From f7c475b8dfc23d461a47dfac5e498f8cc96faea5 Mon Sep 17 00:00:00 2001 From: xinhui pan Date: Wed, 24 Feb 2021 11:28:08 +0800 Subject: [PATCH 0001/3804] drm/ttm: Do not add non-system domain BO into swap list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BO would be added into swap list if it is validated into system domain. If BO is validated again into non-system domain, say, VRAM domain. It actually should not be in the swap list. Acked-by: Alex Deucher Signed-off-by: xinhui pan Signed-off-by: Alex Deucher Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20210224032808.150465-1-xinhui.pan@amd.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 101a68dc615b6..799ec7a7caa4d 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -153,6 +153,8 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, swap = &ttm_bo_glob.swap_lru[bo->priority]; list_move_tail(&bo->swap, swap); + } else { + list_del_init(&bo->swap); } if (bdev->driver->del_from_lru_notify) -- GitLab From ffe8768fb8f391cb478466778c55e2110525c15c Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Thu, 15 Apr 2021 16:45:25 +0800 Subject: [PATCH 0002/3804] drm/vc4: remove unused function Fix the following clang warning: drivers/gpu/drm/vc4/vc4_vec.c:201:1: warning: unused function 'to_vc4_vec_connector' [-Wunused-function]. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/1618476325-112629-1-git-send-email-jiapeng.chong@linux.alibaba.com --- drivers/gpu/drm/vc4/vc4_vec.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c index bd5b8eb58b180..090529d0d5dcd 100644 --- a/drivers/gpu/drm/vc4/vc4_vec.c +++ b/drivers/gpu/drm/vc4/vc4_vec.c @@ -197,12 +197,6 @@ struct vc4_vec_connector { struct drm_encoder *encoder; }; -static inline struct vc4_vec_connector * -to_vc4_vec_connector(struct drm_connector *connector) -{ - return container_of(connector, struct vc4_vec_connector, base); -} - enum vc4_vec_tv_mode_id { VC4_VEC_TV_MODE_NTSC, VC4_VEC_TV_MODE_NTSC_J, -- GitLab From 0e793ba77c18382f08e440260fe72bc6fce2a3cb Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 21 Apr 2021 11:14:02 +0100 Subject: [PATCH 0003/3804] spi: Make of_register_spi_device also set the fwnode Currently, the SPI core doesn't set the struct device fwnode pointer when it creates a new SPI device. This means when the device is registered the fwnode is NULL and the check in device_add which sets the fwnode->dev pointer is skipped. This wasn't previously an issue, however these two patches: commit 4731210c09f5 ("gpiolib: Bind gpio_device to a driver to enable fw_devlink=on by default") commit ced2af419528 ("gpiolib: Don't probe gpio_device if it's not the primary device") Added some code to the GPIO core which relies on using that fwnode->dev pointer to determine if a driver is bound to the fwnode and if not bind a stub GPIO driver. This means the GPIO providers behind SPI will get both the expected driver and this stub driver causing the stub driver to fail if it attempts to request any pin configuration. For example on my system: madera-pinctrl madera-pinctrl: pin gpio5 already requested by madera-pinctrl; cannot claim for gpiochip3 madera-pinctrl madera-pinctrl: pin-4 (gpiochip3) status -22 madera-pinctrl madera-pinctrl: could not request pin 4 (gpio5) from group aif1 on device madera-pinctrl gpio_stub_drv gpiochip3: Error applying setting, reverse things back gpio_stub_drv: probe of gpiochip3 failed with error -22 The firmware node on the device created by the GPIO framework is set through the of_node pointer hence things generally actually work, however that fwnode->dev is never set, as the check was skipped at device_add time. This fix appears to match how the I2C subsystem handles the same situation. Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210421101402.8468-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 904a353798b64..862a9bb691298 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2047,6 +2047,7 @@ of_register_spi_device(struct spi_controller *ctlr, struct device_node *nc) /* Store a pointer to the node in the device structure */ of_node_get(nc); spi->dev.of_node = nc; + spi->dev.fwnode = of_fwnode_handle(nc); /* Register the new device */ rc = spi_add_device(spi); -- GitLab From dbaca8e56ea3f23fa215f48c2d46dd03ede06e02 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 20 Apr 2021 19:44:24 +0300 Subject: [PATCH 0004/3804] spi: Allow to have all native CSs in use along with GPIOs The commit 7d93aecdb58d ("spi: Add generic support for unused native cs with cs-gpios") excludes the valid case for the controllers that doesn't need to switch native CS in order to perform the transfer, i.e. when 0 native ... ... - 1 native GPIO + 1 GPIO ... ... where defines maximum of native CSs supported by the controller. To allow this, bail out from spi_get_gpio_descs() conditionally for the controllers which explicitly marked with SPI_MASTER_GPIO_SS. Fixes: 7d93aecdb58d ("spi: Add generic support for unused native cs with cs-gpios") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210420164425.40287-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 862a9bb691298..2e5dca20c8d00 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2612,8 +2612,9 @@ static int spi_get_gpio_descs(struct spi_controller *ctlr) } ctlr->unused_native_cs = ffz(native_cs_mask); - if (num_cs_gpios && ctlr->max_native_cs && - ctlr->unused_native_cs >= ctlr->max_native_cs) { + + if ((ctlr->flags & SPI_MASTER_GPIO_SS) && num_cs_gpios && + ctlr->max_native_cs && ctlr->unused_native_cs >= ctlr->max_native_cs) { dev_err(dev, "No unused native chip select available\n"); return -EINVAL; } -- GitLab From f60d7270c8a3d2beb1c23ae0da42497afa3584c2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 20 Apr 2021 19:44:25 +0300 Subject: [PATCH 0005/3804] spi: Avoid undefined behaviour when counting unused native CSs ffz(), that has been used to count unused native CSs, might cause undefined behaviour when called against ~0U. To fix that, open code it with ffs(~value) - 1. Fixes: 7d93aecdb58d ("spi: Add generic support for unused native cs with cs-gpios") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210420164425.40287-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 2e5dca20c8d00..6fe2a9509675f 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2611,7 +2611,7 @@ static int spi_get_gpio_descs(struct spi_controller *ctlr) native_cs_mask |= BIT(i); } - ctlr->unused_native_cs = ffz(native_cs_mask); + ctlr->unused_native_cs = ffs(~native_cs_mask) - 1; if ((ctlr->flags & SPI_MASTER_GPIO_SS) && num_cs_gpios && ctlr->max_native_cs && ctlr->unused_native_cs >= ctlr->max_native_cs) { -- GitLab From 9fdd04918a452980631ecc499317881c1d120b70 Mon Sep 17 00:00:00 2001 From: Dan Robertson Date: Fri, 23 Apr 2021 00:02:13 -0400 Subject: [PATCH 0006/3804] net: ieee802154: fix null deref in parse dev addr Fix a logic error that could result in a null deref if the user sets the mode incorrectly for the given addr type. Signed-off-by: Dan Robertson Acked-by: Alexander Aring Link: https://lore.kernel.org/r/20210423040214.15438-2-dan@dlrobertson.com Signed-off-by: Stefan Schmidt --- net/ieee802154/nl802154.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 05f6bd89a7dd8..0cf2374c143bd 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1298,19 +1298,20 @@ ieee802154_llsec_parse_dev_addr(struct nlattr *nla, if (!nla || nla_parse_nested_deprecated(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla, nl802154_dev_addr_policy, NULL)) return -EINVAL; - if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || - !attrs[NL802154_DEV_ADDR_ATTR_MODE] || - !(attrs[NL802154_DEV_ADDR_ATTR_SHORT] || - attrs[NL802154_DEV_ADDR_ATTR_EXTENDED])) + if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] || !attrs[NL802154_DEV_ADDR_ATTR_MODE]) return -EINVAL; addr->pan_id = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_PAN_ID]); addr->mode = nla_get_u32(attrs[NL802154_DEV_ADDR_ATTR_MODE]); switch (addr->mode) { case NL802154_DEV_ADDR_SHORT: + if (!attrs[NL802154_DEV_ADDR_ATTR_SHORT]) + return -EINVAL; addr->short_addr = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_SHORT]); break; case NL802154_DEV_ADDR_EXTENDED: + if (!attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]) + return -EINVAL; addr->extended_addr = nla_get_le64(attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]); break; default: -- GitLab From 6c9762a78c325107dc37d20ee21002b841679209 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Fri, 23 Apr 2021 15:54:02 +0200 Subject: [PATCH 0007/3804] ASoC: max98088: fix ni clock divider calculation The ni1/ni2 ratio formula [1] uses the pclk which is the prescaled mclk. The max98088 datasheet [2] has no such formula but table-12 equals so we can assume that it is the same for both devices. While on it make use of DIV_ROUND_CLOSEST_ULL(). [1] https://datasheets.maximintegrated.com/en/ds/MAX98089.pdf; page 86 [2] https://datasheets.maximintegrated.com/en/ds/MAX98088.pdf; page 82 Signed-off-by: Marco Felsch Link: https://lore.kernel.org/r/20210423135402.32105-1-m.felsch@pengutronix.de Signed-off-by: Mark Brown --- sound/soc/codecs/max98088.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/max98088.c b/sound/soc/codecs/max98088.c index 4be24e7f51c89..f8e49e45ce33f 100644 --- a/sound/soc/codecs/max98088.c +++ b/sound/soc/codecs/max98088.c @@ -41,6 +41,7 @@ struct max98088_priv { enum max98088_type devtype; struct max98088_pdata *pdata; struct clk *mclk; + unsigned char mclk_prescaler; unsigned int sysclk; struct max98088_cdata dai[2]; int eq_textcnt; @@ -998,13 +999,16 @@ static int max98088_dai1_hw_params(struct snd_pcm_substream *substream, /* Configure NI when operating as master */ if (snd_soc_component_read(component, M98088_REG_14_DAI1_FORMAT) & M98088_DAI_MAS) { + unsigned long pclk; + if (max98088->sysclk == 0) { dev_err(component->dev, "Invalid system clock frequency\n"); return -EINVAL; } ni = 65536ULL * (rate < 50000 ? 96ULL : 48ULL) * (unsigned long long int)rate; - do_div(ni, (unsigned long long int)max98088->sysclk); + pclk = DIV_ROUND_CLOSEST(max98088->sysclk, max98088->mclk_prescaler); + ni = DIV_ROUND_CLOSEST_ULL(ni, pclk); snd_soc_component_write(component, M98088_REG_12_DAI1_CLKCFG_HI, (ni >> 8) & 0x7F); snd_soc_component_write(component, M98088_REG_13_DAI1_CLKCFG_LO, @@ -1065,13 +1069,16 @@ static int max98088_dai2_hw_params(struct snd_pcm_substream *substream, /* Configure NI when operating as master */ if (snd_soc_component_read(component, M98088_REG_1C_DAI2_FORMAT) & M98088_DAI_MAS) { + unsigned long pclk; + if (max98088->sysclk == 0) { dev_err(component->dev, "Invalid system clock frequency\n"); return -EINVAL; } ni = 65536ULL * (rate < 50000 ? 96ULL : 48ULL) * (unsigned long long int)rate; - do_div(ni, (unsigned long long int)max98088->sysclk); + pclk = DIV_ROUND_CLOSEST(max98088->sysclk, max98088->mclk_prescaler); + ni = DIV_ROUND_CLOSEST_ULL(ni, pclk); snd_soc_component_write(component, M98088_REG_1A_DAI2_CLKCFG_HI, (ni >> 8) & 0x7F); snd_soc_component_write(component, M98088_REG_1B_DAI2_CLKCFG_LO, @@ -1113,8 +1120,10 @@ static int max98088_dai_set_sysclk(struct snd_soc_dai *dai, */ if ((freq >= 10000000) && (freq < 20000000)) { snd_soc_component_write(component, M98088_REG_10_SYS_CLK, 0x10); + max98088->mclk_prescaler = 1; } else if ((freq >= 20000000) && (freq < 30000000)) { snd_soc_component_write(component, M98088_REG_10_SYS_CLK, 0x20); + max98088->mclk_prescaler = 2; } else { dev_err(component->dev, "Invalid master clock frequency\n"); return -EINVAL; -- GitLab From 366db3ac3cdf97e90695282b959c75d5ea58cf00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 21 Apr 2021 17:02:20 +0200 Subject: [PATCH 0008/3804] arm64: dts: renesas: aistarvision-mipi-adapter-2.1: Fix CSI40 ports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the DTS schema by explicitly stating that the input is port@0. This fixes a schema validation error but has no runtime effect as the default port number is 0 if not specified. Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210421150221.3202955-2-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Geert Uytterhoeven --- arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts index e7b4a929bb174..2e3d1981cac48 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts +++ b/arch/arm64/boot/dts/renesas/r8a774c0-ek874-mipi-2.1.dts @@ -33,7 +33,7 @@ status = "okay"; ports { - port { + port@0 { csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; -- GitLab From 0a96c05995ef1085f9c5e6bf005a04915dd2ec6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Wed, 21 Apr 2021 17:02:21 +0200 Subject: [PATCH 0009/3804] arm64: dts: renesas: Add port@0 node for all CSI-2 nodes to dtsi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The port@0 is a mandatory port, add or move the declaration to the CSI-2 nodes top declared in dtsi files instead of depending on dts files adding them when describing the external connection. This fixes validation warnings for DTB outputs that do not connect all CSI-2 receivers to transmitters and thus declaring all port@0 nodes in dts files. Signed-off-by: Niklas Söderlund Reviewed-by: Laurent Pinchart Link: https://lore.kernel.org/r/20210421150221.3202955-3-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Geert Uytterhoeven --- ...hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi | 2 -- arch/arm64/boot/dts/renesas/r8a774a1.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a774b1.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a774c0.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/r8a774e1.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77950.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/r8a77951.dtsi | 12 ++++++++++++ arch/arm64/boot/dts/renesas/r8a77960.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77961.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77965.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77970.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/r8a77980.dtsi | 8 ++++++++ arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts | 2 -- arch/arm64/boot/dts/renesas/r8a77990.dtsi | 4 ++++ arch/arm64/boot/dts/renesas/salvator-common.dtsi | 3 --- 15 files changed, 84 insertions(+), 7 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi index c62ddb9b2ba56..3771144a2ce49 100644 --- a/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi +++ b/arch/arm64/boot/dts/renesas/hihope-rzg2-ex-aistarvision-mipi-adapter-2.1.dtsi @@ -14,7 +14,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; @@ -29,7 +28,6 @@ ports { port@0 { - reg = <0>; csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi index d64fb8b1b86c3..46f8dbf689048 100644 --- a/arch/arm64/boot/dts/renesas/r8a774a1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774a1.dtsi @@ -2573,6 +2573,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2628,6 +2632,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi index 5b05474dc2727..d16a4be5ef77a 100644 --- a/arch/arm64/boot/dts/renesas/r8a774b1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774b1.dtsi @@ -2419,6 +2419,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2474,6 +2478,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi index 20fa3caa050e5..1aef34447abd1 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi @@ -1823,6 +1823,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi index 8eb006cbd9af4..1f51237ab0a64 100644 --- a/arch/arm64/boot/dts/renesas/r8a774e1.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774e1.dtsi @@ -2709,6 +2709,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2764,6 +2768,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77950.dtsi b/arch/arm64/boot/dts/renesas/r8a77950.dtsi index 25b87da32eebb..b643d3079db1e 100644 --- a/arch/arm64/boot/dts/renesas/r8a77950.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77950.dtsi @@ -192,6 +192,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77951.dtsi b/arch/arm64/boot/dts/renesas/r8a77951.dtsi index 5c39152e45707..85d66d15465ab 100644 --- a/arch/arm64/boot/dts/renesas/r8a77951.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77951.dtsi @@ -3097,6 +3097,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3152,6 +3156,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -3191,6 +3199,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77960.dtsi b/arch/arm64/boot/dts/renesas/r8a77960.dtsi index 25d947a81b294..12476e354d746 100644 --- a/arch/arm64/boot/dts/renesas/r8a77960.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77960.dtsi @@ -2761,6 +2761,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2816,6 +2820,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77961.dtsi b/arch/arm64/boot/dts/renesas/r8a77961.dtsi index ab081f14af9aa..d9804768425a7 100644 --- a/arch/arm64/boot/dts/renesas/r8a77961.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77961.dtsi @@ -2499,6 +2499,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2554,6 +2558,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77965.dtsi b/arch/arm64/boot/dts/renesas/r8a77965.dtsi index 657b20d3533bd..dcb9df861d749 100644 --- a/arch/arm64/boot/dts/renesas/r8a77965.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77965.dtsi @@ -2575,6 +2575,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -2630,6 +2634,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77970.dtsi b/arch/arm64/boot/dts/renesas/r8a77970.dtsi index 5a5d5649332a8..e8f6352c3665f 100644 --- a/arch/arm64/boot/dts/renesas/r8a77970.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77970.dtsi @@ -1106,6 +1106,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77980.dtsi b/arch/arm64/boot/dts/renesas/r8a77980.dtsi index 1ffa4a995a7ab..7b51d464de0ea 100644 --- a/arch/arm64/boot/dts/renesas/r8a77980.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77980.dtsi @@ -1439,6 +1439,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; @@ -1478,6 +1482,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts index 295d34f1d216d..4715e4a4abe06 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts +++ b/arch/arm64/boot/dts/renesas/r8a77990-ebisu.dts @@ -298,8 +298,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2>; diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi index 5010f23fafcc7..0eaea58f4210d 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi @@ -1970,6 +1970,10 @@ #address-cells = <1>; #size-cells = <0>; + port@0 { + reg = <0>; + }; + port@1 { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm64/boot/dts/renesas/salvator-common.dtsi b/arch/arm64/boot/dts/renesas/salvator-common.dtsi index e18747df219f8..453ffcef24fae 100644 --- a/arch/arm64/boot/dts/renesas/salvator-common.dtsi +++ b/arch/arm64/boot/dts/renesas/salvator-common.dtsi @@ -349,7 +349,6 @@ ports { port@0 { - reg = <0>; csi20_in: endpoint { clock-lanes = <0>; data-lanes = <1>; @@ -364,8 +363,6 @@ ports { port@0 { - reg = <0>; - csi40_in: endpoint { clock-lanes = <0>; data-lanes = <1 2 3 4>; -- GitLab From d9cd78edb2e6b7e26747c0ec312be31e7ef196fe Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 22 Apr 2021 12:02:29 +0300 Subject: [PATCH 0010/3804] firmware: arm_scpi: Prevent the ternary sign expansion bug How the type promotion works in ternary expressions is a bit tricky. The problem is that scpi_clk_get_val() returns longs, "ret" is a int which holds a negative error code, and le32_to_cpu() is an unsigned int. We want the negative error code to be cast to a negative long. But because le32_to_cpu() is an u32 then "ret" is type promoted to u32 and becomes a high positive and then it is promoted to long and it is still a high positive value. Fix this by getting rid of the ternary. Link: https://lore.kernel.org/r/YIE7pdqV/h10tEAK@mwanda Fixes: 8cb7cf56c9fe ("firmware: add support for ARM System Control and Power Interface(SCPI) protocol") Reviewed-by: Cristian Marussi Signed-off-by: Dan Carpenter [sudeep.holla: changed to return 0 as clock rate on error] Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scpi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c index d0dee37ad5228..4ceba5ef78958 100644 --- a/drivers/firmware/arm_scpi.c +++ b/drivers/firmware/arm_scpi.c @@ -552,8 +552,10 @@ static unsigned long scpi_clk_get_val(u16 clk_id) ret = scpi_send_message(CMD_GET_CLOCK_VALUE, &le_clk_id, sizeof(le_clk_id), &rate, sizeof(rate)); + if (ret) + return 0; - return ret ? ret : le32_to_cpu(rate); + return le32_to_cpu(rate); } static int scpi_clk_set_val(u16 clk_id, unsigned long rate) -- GitLab From 03f840c49207e8c125b3df8c29c13137c6675d42 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 27 Apr 2021 11:30:31 +0800 Subject: [PATCH 0011/3804] firmware: arm_scmi: Remove duplicate declaration of struct scmi_protocol_handle struct scmi_protocol_handle is declared twice, let us remove the duplicate declaration. Link: https://lore.kernel.org/r/20210427033031.4580-1-wanjiabing@vivo.com Reviewed-by: Cristian Marussi Signed-off-by: Wan Jiabing [sudeep.holla: minor updates to the title and the changelog] Signed-off-by: Sudeep Holla --- drivers/firmware/arm_scmi/notify.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/firmware/arm_scmi/notify.h b/drivers/firmware/arm_scmi/notify.h index ce0324be6c71d..4e9b627edfefa 100644 --- a/drivers/firmware/arm_scmi/notify.h +++ b/drivers/firmware/arm_scmi/notify.h @@ -79,8 +79,6 @@ struct scmi_protocol_events { int scmi_notification_init(struct scmi_handle *handle); void scmi_notification_exit(struct scmi_handle *handle); - -struct scmi_protocol_handle; int scmi_register_protocol_events(const struct scmi_handle *handle, u8 proto_id, const struct scmi_protocol_handle *ph, const struct scmi_protocol_events *ee); -- GitLab From 10f76165d30bf568214e75767f2d8d8682cd4040 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 26 Apr 2021 16:53:25 -0700 Subject: [PATCH 0012/3804] drm/msm: Do not unpin/evict exported dma-buf's Our initial logic for excluding dma-bufs was not quite right. In particular we want msm_gem_get/put_pages() path used for exported dma-bufs to increment/decrement the pin-count. Also, in case the importer is vmap'ing the dma-buf, we need to be sure to update the object's status, because it is now no longer potentially evictable. Fixes: 63f17ef83428 drm/msm: Support evicting GEM objects to swap Signed-off-by: Rob Clark Link: https://lore.kernel.org/r/20210426235326.1230125-1-robdclark@gmail.com Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_gem.c | 16 +++++++++++++++- drivers/gpu/drm/msm/msm_gem.h | 4 ++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index b199942266a26..56df86e5f7400 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -190,13 +190,25 @@ struct page **msm_gem_get_pages(struct drm_gem_object *obj) } p = get_pages(obj); + + if (!IS_ERR(p)) { + msm_obj->pin_count++; + update_inactive(msm_obj); + } + msm_gem_unlock(obj); return p; } void msm_gem_put_pages(struct drm_gem_object *obj) { - /* when we start tracking the pin count, then do something here */ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + + msm_gem_lock(obj); + msm_obj->pin_count--; + GEM_WARN_ON(msm_obj->pin_count < 0); + update_inactive(msm_obj); + msm_gem_unlock(obj); } int msm_gem_mmap_obj(struct drm_gem_object *obj, @@ -646,6 +658,8 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) ret = -ENOMEM; goto fail; } + + update_inactive(msm_obj); } return msm_obj->vaddr; diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index a6480d2c81b2c..03e2cc2a2ce15 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -221,7 +221,7 @@ static inline bool is_active(struct msm_gem_object *msm_obj) /* imported/exported objects are not purgeable: */ static inline bool is_unpurgeable(struct msm_gem_object *msm_obj) { - return msm_obj->base.dma_buf && msm_obj->base.import_attach; + return msm_obj->base.import_attach || msm_obj->pin_count; } static inline bool is_purgeable(struct msm_gem_object *msm_obj) @@ -271,7 +271,7 @@ static inline void mark_unpurgeable(struct msm_gem_object *msm_obj) static inline bool is_unevictable(struct msm_gem_object *msm_obj) { - return is_unpurgeable(msm_obj) || msm_obj->pin_count || msm_obj->vaddr; + return is_unpurgeable(msm_obj) || msm_obj->vaddr; } static inline void mark_evictable(struct msm_gem_object *msm_obj) -- GitLab From 4b95d371fb001185af84d177e69a23d55bd0167a Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Fri, 23 Apr 2021 21:49:26 -0400 Subject: [PATCH 0013/3804] drm/msm: fix LLC not being enabled for mmu500 targets mmu500 targets don't have a "cx_mem" region, set llc_mmio to NULL in that case to avoid the IS_ERR() condition in a6xx_llc_activate(). Fixes: 3d247123b5a1 ("drm/msm/a6xx: Add support for using system cache on MMU500 based targets") Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20210424014927.1661-1-jonathan@marek.ca Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index d553f62f4eeb8..b4d8e1b01ee4f 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1153,10 +1153,6 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, { struct device_node *phandle; - a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); - if (IS_ERR(a6xx_gpu->llc_mmio)) - return; - /* * There is a different programming path for targets with an mmu500 * attached, so detect if that is the case @@ -1166,6 +1162,11 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, of_device_is_compatible(phandle, "arm,mmu-500")); of_node_put(phandle); + if (a6xx_gpu->have_mmu500) + a6xx_gpu->llc_mmio = NULL; + else + a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx"); + a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); -- GitLab From 08811c057b3e22f7a3df3955c138a59f3b651df0 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Sat, 10 Apr 2021 04:19:01 +0300 Subject: [PATCH 0014/3804] drm/msm/dsi: dsi_phy_28nm_8960: fix uninitialized variable access The parent_name initialization was lost in refactoring, restore it now. Fixes: 5d13459650b3 ("drm/msm/dsi: push provided clocks handling into a generic code") Reported-by: kernel test robot Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Link: https://lore.kernel.org/r/20210410011901.1735866-1-dmitry.baryshkov@linaro.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c index 582b1428f9715..86e40a0d41a3b 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c @@ -405,6 +405,10 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov if (!vco_name) return -ENOMEM; + parent_name = devm_kzalloc(dev, 32, GFP_KERNEL); + if (!parent_name) + return -ENOMEM; + clk_name = devm_kzalloc(dev, 32, GFP_KERNEL); if (!clk_name) return -ENOMEM; -- GitLab From 094c7f39ba4b5ae7e4c448527834428b79e3baf9 Mon Sep 17 00:00:00 2001 From: Dmitry Baryshkov Date: Mon, 12 Apr 2021 03:01:58 +0300 Subject: [PATCH 0015/3804] drm/msm/dsi: fix msm_dsi_phy_get_clk_provider return code msm_dsi_phy_get_clk_provider() always returns two provided clocks, so return 0 instead of returning incorrect -EINVAL error code. Fixes: 5d13459650b3 ("drm/msm/dsi: push provided clocks handling into a generic code") Signed-off-by: Dmitry Baryshkov Reviewed-by: Abhinav Kumar Tested-by: Jonathan Marek Link: https://lore.kernel.org/r/20210412000158.2049066-1-dmitry.baryshkov@linaro.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dsi/phy/dsi_phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index f0a2ddf96a4b9..ff7f2ec420300 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -843,7 +843,7 @@ int msm_dsi_phy_get_clk_provider(struct msm_dsi_phy *phy, if (pixel_clk_provider) *pixel_clk_provider = phy->provided_clocks->hws[DSI_PIXEL_PLL_CLK]->clk; - return -EINVAL; + return 0; } void msm_dsi_phy_pll_save_state(struct msm_dsi_phy *phy) -- GitLab From 774cda6f12d5ad11410c4cda223554c3735ee862 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 27 Apr 2021 22:59:55 +0200 Subject: [PATCH 0016/3804] dt-bindings: nvmem: mediatek: remove duplicate mt8192 line The same patch was accidentally merged twice, resulting in a duplicate line for the mt8192 SoC. Fixes: f2674c0c7488 ("dt-bindings: nvmem: mediatek: add support for MediaTek mt8192 SoC") Fixes: 2a1405a14c3a ("dt-bindings: nvmem: mediatek: add support for MediaTek mt8192 SoC") Signed-off-by: Arnd Bergmann --- Documentation/devicetree/bindings/nvmem/mtk-efuse.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt b/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt index d479ad977e24f..b6791702bcfc9 100644 --- a/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt +++ b/Documentation/devicetree/bindings/nvmem/mtk-efuse.txt @@ -9,7 +9,6 @@ Required properties: "mediatek,mt8173-efuse" or "mediatek,efuse": for MT8173 "mediatek,mt8192-efuse", "mediatek,efuse": for MT8192 "mediatek,mt8516-efuse", "mediatek,efuse": for MT8516 - "mediatek,mt8192-efuse", "mediatek,efuse": for MT8192 - reg: Should contain registers location and length = Data cells = -- GitLab From c019d92457826bb7b2091c86f36adb5de08405f9 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 23 Apr 2021 17:09:28 +0200 Subject: [PATCH 0017/3804] openrisc: Fix a memory leak 'setup_find_cpu_node()' take a reference on the node it returns. This reference must be decremented when not needed anymore, or there will be a leak. Add the missing 'of_node_put(cpu)'. Note that 'setup_cpuinfo()' that also calls this function already has a correct 'of_node_put(cpu)' at its end. Fixes: 9d02a4283e9c ("OpenRISC: Boot code") Signed-off-by: Christophe JAILLET Signed-off-by: Stafford Horne --- arch/openrisc/kernel/setup.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 2416a9f915330..c6f9e7b9f7cb2 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -278,6 +278,8 @@ void calibrate_delay(void) pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n", loops_per_jiffy / (500000 / HZ), (loops_per_jiffy / (5000 / HZ)) % 100, loops_per_jiffy); + + of_node_put(cpu); } void __init setup_arch(char **cmdline_p) -- GitLab From a0695853e5906a9558eef9f79856e07659b7a1e6 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 28 Apr 2021 14:26:31 +0200 Subject: [PATCH 0018/3804] ASoC: stm32: do not request a new clock consummer reference This reverts commit 65d1cce726d4912793d0a84c55ecdb0ef5832130. There is problem with clk_hw_get_hw(). Using it pins the clock provider to itself, making it impossible to remove the module. Revert commit 65d1cce726d4 ("ASoC: stm32: properly get clk from the provider") until this gets sorted out. Signed-off-by: Jerome Brunet Link: https://lore.kernel.org/r/20210428122632.46244-2-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/stm/stm32_sai_sub.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sound/soc/stm/stm32_sai_sub.c b/sound/soc/stm/stm32_sai_sub.c index c1561237ee24b..3aa1cf2624020 100644 --- a/sound/soc/stm/stm32_sai_sub.c +++ b/sound/soc/stm/stm32_sai_sub.c @@ -484,10 +484,7 @@ static int stm32_sai_add_mclk_provider(struct stm32_sai_sub_data *sai) dev_err(dev, "mclk register returned %d\n", ret); return ret; } - - sai->sai_mclk = devm_clk_hw_get_clk(dev, hw, NULL); - if (IS_ERR(sai->sai_mclk)) - return PTR_ERR(sai->sai_mclk); + sai->sai_mclk = hw->clk; /* register mclk provider */ return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw); -- GitLab From 97c733654ab4a5ac910216b4b74e605acf3e1cce Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Wed, 28 Apr 2021 14:26:32 +0200 Subject: [PATCH 0019/3804] ASoC: da7219: do not request a new clock consummer reference This reverts commit 12f8127fe9e6154dd4197df97e44f3fd67583071. There is problem with clk_hw_get_hw(). Using it pins the clock provider to itself, making it impossible to remove the module. Revert commit 12f8127fe9e6 ("ASoC: da7219: properly get clk from the provider") until this gets sorted out. Reported-by: Pierre-Louis Bossart Signed-off-by: Jerome Brunet Tested-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210428122632.46244-3-jbrunet@baylibre.com Signed-off-by: Mark Brown --- sound/soc/codecs/da7219.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sound/soc/codecs/da7219.c b/sound/soc/codecs/da7219.c index bd3c523a86171..13009d08b09ac 100644 --- a/sound/soc/codecs/da7219.c +++ b/sound/soc/codecs/da7219.c @@ -2181,10 +2181,7 @@ static int da7219_register_dai_clks(struct snd_soc_component *component) ret); goto err; } - - da7219->dai_clks[i] = devm_clk_hw_get_clk(dev, dai_clk_hw, NULL); - if (IS_ERR(da7219->dai_clks[i])) - return PTR_ERR(da7219->dai_clks[i]); + da7219->dai_clks[i] = dai_clk_hw->clk; /* For DT setup onecell data, otherwise create lookup */ if (np) { -- GitLab From 6879e8e759bf9e05eaee85e32ca1a936e6b46da1 Mon Sep 17 00:00:00 2001 From: Vijendar Mukunda Date: Wed, 28 Apr 2021 01:53:31 +0530 Subject: [PATCH 0020/3804] ASoC: amd: fix for pcm_read() error Below phython script throwing pcm_read() error. import subprocess p = subprocess.Popen(["aplay -t raw -D plughw:1,0 /dev/zero"], shell=True) subprocess.call(["arecord -Dhw:1,0 --dump-hw-params"], shell=True) subprocess.call(["arecord -Dhw:1,0 -fdat -d1 /dev/null"], shell=True) p.kill() Handling ACP global external interrupt enable register causing this issue. This register got updated wrongly when there is active stream causing interrupts disabled for active stream. Refactored code to handle enabling and disabling external interrupts. Signed-off-by: Vijendar Mukunda Link: https://lore.kernel.org/r/1619555017-29858-1-git-send-email-Vijendar.Mukunda@amd.com Signed-off-by: Mark Brown --- sound/soc/amd/raven/acp3x-pcm-dma.c | 10 ---------- sound/soc/amd/raven/acp3x.h | 1 + sound/soc/amd/raven/pci-acp3x.c | 15 +++++++++++++++ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/sound/soc/amd/raven/acp3x-pcm-dma.c b/sound/soc/amd/raven/acp3x-pcm-dma.c index 417cda24030cd..2447a1e6e913f 100644 --- a/sound/soc/amd/raven/acp3x-pcm-dma.c +++ b/sound/soc/amd/raven/acp3x-pcm-dma.c @@ -237,10 +237,6 @@ static int acp3x_dma_open(struct snd_soc_component *component, return ret; } - if (!adata->play_stream && !adata->capture_stream && - !adata->i2ssp_play_stream && !adata->i2ssp_capture_stream) - rv_writel(1, adata->acp3x_base + mmACP_EXTERNAL_INTR_ENB); - i2s_data->acp3x_base = adata->acp3x_base; runtime->private_data = i2s_data; return ret; @@ -367,12 +363,6 @@ static int acp3x_dma_close(struct snd_soc_component *component, } } - /* Disable ACP irq, when the current stream is being closed and - * another stream is also not active. - */ - if (!adata->play_stream && !adata->capture_stream && - !adata->i2ssp_play_stream && !adata->i2ssp_capture_stream) - rv_writel(0, adata->acp3x_base + mmACP_EXTERNAL_INTR_ENB); return 0; } diff --git a/sound/soc/amd/raven/acp3x.h b/sound/soc/amd/raven/acp3x.h index 03fe93913e12e..c3f0c8b7545db 100644 --- a/sound/soc/amd/raven/acp3x.h +++ b/sound/soc/amd/raven/acp3x.h @@ -77,6 +77,7 @@ #define ACP_POWER_OFF_IN_PROGRESS 0x03 #define ACP3x_ITER_IRER_SAMP_LEN_MASK 0x38 +#define ACP_EXT_INTR_STAT_CLEAR_MASK 0xFFFFFFFF struct acp3x_platform_info { u16 play_i2s_instance; diff --git a/sound/soc/amd/raven/pci-acp3x.c b/sound/soc/amd/raven/pci-acp3x.c index d3536fd6a1240..a013a607b3d47 100644 --- a/sound/soc/amd/raven/pci-acp3x.c +++ b/sound/soc/amd/raven/pci-acp3x.c @@ -76,6 +76,19 @@ static int acp3x_reset(void __iomem *acp3x_base) return -ETIMEDOUT; } +static void acp3x_enable_interrupts(void __iomem *acp_base) +{ + rv_writel(0x01, acp_base + mmACP_EXTERNAL_INTR_ENB); +} + +static void acp3x_disable_interrupts(void __iomem *acp_base) +{ + rv_writel(ACP_EXT_INTR_STAT_CLEAR_MASK, acp_base + + mmACP_EXTERNAL_INTR_STAT); + rv_writel(0x00, acp_base + mmACP_EXTERNAL_INTR_CNTL); + rv_writel(0x00, acp_base + mmACP_EXTERNAL_INTR_ENB); +} + static int acp3x_init(struct acp3x_dev_data *adata) { void __iomem *acp3x_base = adata->acp3x_base; @@ -93,6 +106,7 @@ static int acp3x_init(struct acp3x_dev_data *adata) pr_err("ACP3x reset failed\n"); return ret; } + acp3x_enable_interrupts(acp3x_base); return 0; } @@ -100,6 +114,7 @@ static int acp3x_deinit(void __iomem *acp3x_base) { int ret; + acp3x_disable_interrupts(acp3x_base); /* Reset */ ret = acp3x_reset(acp3x_base); if (ret) { -- GitLab From c7299fea67696db5bd09d924d1f1080d894f92ef Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Mon, 26 Apr 2021 16:56:38 -0700 Subject: [PATCH 0021/3804] spi: Fix spi device unregister flow When an SPI device is unregistered, the spi->controller->cleanup() is called in the device's release callback. That's wrong for a couple of reasons: 1. spi_dev_put() can be called before spi_add_device() is called. And it's spi_add_device() that calls spi_setup(). This will cause clean() to get called without the spi device ever being setup. 2. There's no guarantee that the controller's driver would be present by the time the spi device's release function gets called. 3. It also causes "sleeping in atomic context" stack dump[1] when device link deletion code does a put_device() on the spi device. Fix these issues by simply moving the cleanup from the device release callback to the actual spi_unregister_device() function. [1] - https://lore.kernel.org/lkml/CAHp75Vc=FCGcUyS0v6fnxme2YJ+qD+Y-hQDQLa2JhWNON9VmsQ@mail.gmail.com/ Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210426235638.1285530-1-saravanak@google.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index ba425b9c77007..f9885c0965637 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -47,10 +47,6 @@ static void spidev_release(struct device *dev) { struct spi_device *spi = to_spi_device(dev); - /* spi controllers may cleanup for released devices */ - if (spi->controller->cleanup) - spi->controller->cleanup(spi); - spi_controller_put(spi->controller); kfree(spi->driver_override); kfree(spi); @@ -558,6 +554,12 @@ static int spi_dev_check(struct device *dev, void *data) return 0; } +static void spi_cleanup(struct spi_device *spi) +{ + if (spi->controller->cleanup) + spi->controller->cleanup(spi); +} + /** * spi_add_device - Add spi_device allocated with spi_alloc_device * @spi: spi_device to register @@ -622,11 +624,13 @@ int spi_add_device(struct spi_device *spi) /* Device may be bound to an active driver when this returns */ status = device_add(&spi->dev); - if (status < 0) + if (status < 0) { dev_err(dev, "can't add %s, status %d\n", dev_name(&spi->dev), status); - else + spi_cleanup(spi); + } else { dev_dbg(dev, "registered child %s\n", dev_name(&spi->dev)); + } done: mutex_unlock(&spi_add_lock); @@ -710,6 +714,8 @@ void spi_unregister_device(struct spi_device *spi) if (!spi) return; + spi_cleanup(spi); + if (spi->dev.of_node) { of_node_clear_flag(spi->dev.of_node, OF_POPULATED); of_node_put(spi->dev.of_node); -- GitLab From 41f48a29ebd5ce944e412f491f1876b5abeff1d6 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 27 Apr 2021 16:38:42 +0200 Subject: [PATCH 0022/3804] spi: altera: Make SPI_ALTERA_CORE invisible The SPI_ALTERA_CORE config symbol controls compilation of the Altera SPI Controller core code. It is already selected by all of its users, so there is no reason to make it visible, unless compile-testing. Fixes: b0c3d9354de1f87e ("spi: altera: separate core code from platform code") Signed-off-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/f0cb8e66baba4506db6f42fca74dc51b76883507.1619534253.git.geert+renesas@glider.be Signed-off-by: Mark Brown --- drivers/spi/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/Kconfig b/drivers/spi/Kconfig index 8b161ec4943bf..f4481fe48bf06 100644 --- a/drivers/spi/Kconfig +++ b/drivers/spi/Kconfig @@ -65,7 +65,7 @@ config SPI_ALTERA This is the driver for the Altera SPI Controller. config SPI_ALTERA_CORE - tristate "Altera SPI Controller core code" + tristate "Altera SPI Controller core code" if COMPILE_TEST select REGMAP help "The core code for the Altera SPI Controller" -- GitLab From adbd914dcde0b03bfc08ffe40b81f31b0457833f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 21 Apr 2021 14:31:50 +0100 Subject: [PATCH 0023/3804] btrfs: zoned: fix silent data loss after failure splitting ordered extent On a zoned filesystem, sometimes we need to split an ordered extent into 3 different ordered extents. The original ordered extent is shortened, at the front and at the rear, and we create two other new ordered extents to represent the trimmed parts of the original ordered extent. After adjusting the original ordered extent, we create an ordered extent to represent the pre-range, and that may fail with ENOMEM for example. After that we always try to create the ordered extent for the post-range, and if that happens to succeed we end up returning success to the caller as we overwrite the 'ret' variable which contained the previous error. This means we end up with a file range for which there is no ordered extent, which results in the range never getting a new file extent item pointing to the new data location. And since the split operation did not return an error, writeback does not fail and the inode's mapping is not flagged with an error, resulting in a subsequent fsync not reporting an error either. It's possibly very unlikely to have the creation of the post-range ordered extent succeed after the creation of the pre-range ordered extent failed, but it's not impossible. So fix this by making sure we only create the post-range ordered extent if there was no error creating the ordered extent for the pre-range. Fixes: d22002fd37bd97 ("btrfs: zoned: split ordered extent when bio is sent") CC: stable@vger.kernel.org # 5.12+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/ordered-data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 07b0b42187913..6c413bb451a3d 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -984,7 +984,7 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre, if (pre) ret = clone_ordered_extent(ordered, 0, pre); - if (post) + if (ret == 0 && post) ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes, post); -- GitLab From ffb7c2e923cb3232454a513dcb5636e73091aa88 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 22 Apr 2021 12:09:21 +0100 Subject: [PATCH 0024/3804] btrfs: do not consider send context as valid when trying to flush qgroups At qgroup.c:try_flush_qgroup() we are asserting that current->journal_info is either NULL or has the value BTRFS_SEND_TRANS_STUB. However allowing for BTRFS_SEND_TRANS_STUB makes no sense because: 1) It is misleading, because send operations are read-only and do not ever need to reserve qgroup space; 2) We already assert that current->journal_info != BTRFS_SEND_TRANS_STUB at transaction.c:start_transaction(); 3) On a kernel without CONFIG_BTRFS_ASSERT=y set, it would result in a crash if try_flush_qgroup() is ever called in a send context, because at transaction.c:start_transaction we cast current->journal_info into a struct btrfs_trans_handle pointer and then dereference it. So just do allow a send context at try_flush_qgroup() and update the comment about it. Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 2319c923c9e69..b1caf5acf1e2c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3545,11 +3545,15 @@ static int try_flush_qgroup(struct btrfs_root *root) struct btrfs_trans_handle *trans; int ret; - /* Can't hold an open transaction or we run the risk of deadlocking */ - ASSERT(current->journal_info == NULL || - current->journal_info == BTRFS_SEND_TRANS_STUB); - if (WARN_ON(current->journal_info && - current->journal_info != BTRFS_SEND_TRANS_STUB)) + /* + * Can't hold an open transaction or we run the risk of deadlocking, + * and can't either be under the context of a send operation (where + * current->journal_info is set to BTRFS_SEND_TRANS_STUB), as that + * would result in a crash when starting a transaction and does not + * make sense either (send is a read-only operation). + */ + ASSERT(current->journal_info == NULL); + if (WARN_ON(current->journal_info)) return 0; /* -- GitLab From 626e9f41f7c281ba3e02843702f68471706aa6d9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 27 Apr 2021 11:27:20 +0100 Subject: [PATCH 0025/3804] btrfs: fix race leading to unpersisted data and metadata on fsync When doing a fast fsync on a file, there is a race which can result in the fsync returning success to user space without logging the inode and without durably persisting new data. The following example shows one possible scenario for this: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ touch /mnt/bar $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/baz # Now we have: # file bar == inode 257 # file baz == inode 258 $ mv /mnt/baz /mnt/foo # Now we have: # file bar == inode 257 # file foo == inode 258 $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo # fsync bar before foo, it is important to trigger the race. $ xfs_io -c "fsync" /mnt/bar $ xfs_io -c "fsync" /mnt/foo # After this: # inode 257, file bar, is empty # inode 258, file foo, has 1M filled with 0xcd # Replay the log: $ mount /dev/sdc /mnt # After this point file foo should have 1M filled with 0xcd and not 0xab The following steps explain how the race happens: 1) Before the first fsync of inode 258, when it has the "baz" name, its ->logged_trans is 0, ->last_sub_trans is 0 and ->last_log_commit is -1. The inode also has the full sync flag set; 2) After the first fsync, we set inode 258 ->logged_trans to 6, which is the generation of the current transaction, and set ->last_log_commit to 0, which is the current value of ->last_sub_trans (done at btrfs_log_inode()). The full sync flag is cleared from the inode during the fsync. The log sub transaction that was committed had an ID of 0 and when we synced the log, at btrfs_sync_log(), we incremented root->log_transid from 0 to 1; 3) During the rename: We update inode 258, through btrfs_update_inode(), and that causes its ->last_sub_trans to be set to 1 (the current log transaction ID), and ->last_log_commit remains with a value of 0. After updating inode 258, because we have previously logged the inode in the previous fsync, we log again the inode through the call to btrfs_log_new_name(). This results in updating the inode's ->last_log_commit from 0 to 1 (the current value of its ->last_sub_trans). The ->last_sub_trans of inode 257 is updated to 1, which is the ID of the next log transaction; 4) Then a buffered write against inode 258 is made. This leaves the value of ->last_sub_trans as 1 (the ID of the current log transaction, stored at root->log_transid); 5) Then an fsync against inode 257 (or any other inode other than 258), happens. This results in committing the log transaction with ID 1, which results in updating root->last_log_commit to 1 and bumping root->log_transid from 1 to 2; 6) Then an fsync against inode 258 starts. We flush delalloc and wait only for writeback to complete, since the full sync flag is not set in the inode's runtime flags - we do not wait for ordered extents to complete. Then, at btrfs_sync_file(), we call btrfs_inode_in_log() before the ordered extent completes. The call returns true: static inline bool btrfs_inode_in_log(...) { bool ret = false; spin_lock(&inode->lock); if (inode->logged_trans == generation && inode->last_sub_trans <= inode->last_log_commit && inode->last_sub_trans <= inode->root->last_log_commit) ret = true; spin_unlock(&inode->lock); return ret; } generation has a value of 6 (fs_info->generation), ->logged_trans also has a value of 6 (set when we logged the inode during the first fsync and when logging it during the rename), ->last_sub_trans has a value of 1, set during the rename (step 3), ->last_log_commit also has a value of 1 (set in step 3) and root->last_log_commit has a value of 1, which was set in step 5 when fsyncing inode 257. As a consequence we don't log the inode, any new extents and do not sync the log, resulting in a data loss if a power failure happens after the fsync and before the current transaction commits. Also, because we do not log the inode, after a power failure the mtime and ctime of the inode do not match those we had before. When the ordered extent completes before we call btrfs_inode_in_log(), then the call returns false and we log the inode and sync the log, since at the end of ordered extent completion we update the inode and set ->last_sub_trans to 2 (the value of root->log_transid) and ->last_log_commit to 1. This problem is found after removing the check for the emptiness of the inode's list of modified extents in the recent commit 209ecbb8585bf6 ("btrfs: remove stale comment and logic from btrfs_inode_in_log()"), added in the 5.13 merge window. However checking the emptiness of the list is not really the way to solve this problem, and was never intended to, because while that solves the problem for COW writes, the problem persists for NOCOW writes because in that case the list is always empty. In the case of NOCOW writes, even though we wait for the writeback to complete before returning from btrfs_sync_file(), we end up not logging the inode, which has a new mtime/ctime, and because we don't sync the log, we never issue disk barriers (send REQ_PREFLUSH to the device) since that only happens when we sync the log (when we write super blocks at btrfs_sync_log()). So effectively, for a NOCOW case, when we return from btrfs_sync_file() to user space, we are not guaranteeing that the data is durably persisted on disk. Also, while the example above uses a rename exchange to show how the problem happens, it is not the only way to trigger it. An alternative could be adding a new hard link to inode 258, since that also results in calling btrfs_log_new_name() and updating the inode in the log. An example reproducer using the addition of a hard link instead of a rename operation: $ mkfs.btrfs -f /dev/sdc $ mount /dev/sdc /mnt $ touch /mnt/bar $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/foo $ ln /mnt/foo /mnt/foo_link $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo $ xfs_io -c "fsync" /mnt/bar $ xfs_io -c "fsync" /mnt/foo # Replay the log: $ mount /dev/sdc /mnt # After this point file foo often has 1M filled with 0xab and not 0xcd The reasons leading to the final fsync of file foo, inode 258, not persisting the new data are the same as for the previous example with a rename operation. So fix by never skipping logging and log syncing when there are still any ordered extents in flight. To avoid making the conditional if statement that checks if logging an inode is needed harder to read, place all the logic into an helper function with separate if statements to make it more manageable and easier to read. A test case for fstests will follow soon. For NOCOW writes, the problem existed before commit b5e6c3e170b770 ("btrfs: always wait on ordered extents at fsync time"), introduced in kernel 4.19, then it went away with that commit since we started to always wait for ordered extent completion before logging. The problem came back again once the fast fsync path was changed again to avoid waiting for ordered extent completion, in commit 487781796d3022 ("btrfs: make fast fsyncs wait only for writeback"), added in kernel 5.10. However, for COW writes, the race only happens after the recent commit 209ecbb8585bf6 ("btrfs: remove stale comment and logic from btrfs_inode_in_log()"), introduced in the 5.13 merge window. For NOCOW writes, the bug existed before that commit. So tag 5.10+ as the release for stable backports. CC: stable@vger.kernel.org # 5.10+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/file.c | 35 +++++++++++++++++++++++++---------- fs/btrfs/tree-log.c | 3 ++- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 864c08d08a353..3b10d98b4ebb3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2067,6 +2067,30 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end) return ret; } +static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) +{ + struct btrfs_inode *inode = BTRFS_I(ctx->inode); + struct btrfs_fs_info *fs_info = inode->root->fs_info; + + if (btrfs_inode_in_log(inode, fs_info->generation) && + list_empty(&ctx->ordered_extents)) + return true; + + /* + * If we are doing a fast fsync we can not bail out if the inode's + * last_trans is <= then the last committed transaction, because we only + * update the last_trans of the inode during ordered extent completion, + * and for a fast fsync we don't wait for that, we only wait for the + * writeback to complete. + */ + if (inode->last_trans <= fs_info->last_trans_committed && + (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) || + list_empty(&ctx->ordered_extents))) + return true; + + return false; +} + /* * fsync call for both files and directories. This logs the inode into * the tree log instead of forcing full commits whenever possible. @@ -2185,17 +2209,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) atomic_inc(&root->log_batch); - /* - * If we are doing a fast fsync we can not bail out if the inode's - * last_trans is <= then the last committed transaction, because we only - * update the last_trans of the inode during ordered extent completion, - * and for a fast fsync we don't wait for that, we only wait for the - * writeback to complete. - */ smp_mb(); - if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) || - (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed && - (full_sync || list_empty(&ctx.ordered_extents)))) { + if (skip_inode_logging(&ctx)) { /* * We've had everything committed since the last time we were * modified so clear this flag in case it was set for whatever diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c1353b84ae54c..a0fc3a1390ab3 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -6060,7 +6060,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, * (since logging them is pointless, a link count of 0 means they * will never be accessible). */ - if (btrfs_inode_in_log(inode, trans->transid) || + if ((btrfs_inode_in_log(inode, trans->transid) && + list_empty(&ctx->ordered_extents)) || inode->vfs_inode.i_nlink == 0) { ret = BTRFS_NO_LOG_SYNC; goto end_no_trans; -- GitLab From f9baa501b4fd6962257853d46ddffbc21f27e344 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 22 Apr 2021 12:08:05 +0100 Subject: [PATCH 0026/3804] btrfs: fix deadlock when cloning inline extents and using qgroups There are a few exceptional cases where cloning an inline extent needs to copy the inline extent data into a page of the destination inode. When this happens, we end up starting a transaction while having a dirty page for the destination inode and while having the range locked in the destination's inode iotree too. Because when reserving metadata space for a transaction we may need to flush existing delalloc in case there is not enough free space, we have a mechanism in place to prevent a deadlock, which was introduced in commit 3d45f221ce627d ("btrfs: fix deadlock when cloning inline extent and low on free metadata space"). However when using qgroups, a transaction also reserves metadata qgroup space, which can also result in flushing delalloc in case there is not enough available space at the moment. When this happens we deadlock, since flushing delalloc requires locking the file range in the inode's iotree and the range was already locked at the very beginning of the clone operation, before attempting to start the transaction. When this issue happens, stack traces like the following are reported: [72747.556262] task:kworker/u81:9 state:D stack: 0 pid: 225 ppid: 2 flags:0x00004000 [72747.556268] Workqueue: writeback wb_workfn (flush-btrfs-1142) [72747.556271] Call Trace: [72747.556273] __schedule+0x296/0x760 [72747.556277] schedule+0x3c/0xa0 [72747.556279] io_schedule+0x12/0x40 [72747.556284] __lock_page+0x13c/0x280 [72747.556287] ? generic_file_readonly_mmap+0x70/0x70 [72747.556325] extent_write_cache_pages+0x22a/0x440 [btrfs] [72747.556331] ? __set_page_dirty_nobuffers+0xe7/0x160 [72747.556358] ? set_extent_buffer_dirty+0x5e/0x80 [btrfs] [72747.556362] ? update_group_capacity+0x25/0x210 [72747.556366] ? cpumask_next_and+0x1a/0x20 [72747.556391] extent_writepages+0x44/0xa0 [btrfs] [72747.556394] do_writepages+0x41/0xd0 [72747.556398] __writeback_single_inode+0x39/0x2a0 [72747.556403] writeback_sb_inodes+0x1ea/0x440 [72747.556407] __writeback_inodes_wb+0x5f/0xc0 [72747.556410] wb_writeback+0x235/0x2b0 [72747.556414] ? get_nr_inodes+0x35/0x50 [72747.556417] wb_workfn+0x354/0x490 [72747.556420] ? newidle_balance+0x2c5/0x3e0 [72747.556424] process_one_work+0x1aa/0x340 [72747.556426] worker_thread+0x30/0x390 [72747.556429] ? create_worker+0x1a0/0x1a0 [72747.556432] kthread+0x116/0x130 [72747.556435] ? kthread_park+0x80/0x80 [72747.556438] ret_from_fork+0x1f/0x30 [72747.566958] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs] [72747.566961] Call Trace: [72747.566964] __schedule+0x296/0x760 [72747.566968] ? finish_wait+0x80/0x80 [72747.566970] schedule+0x3c/0xa0 [72747.566995] wait_extent_bit.constprop.68+0x13b/0x1c0 [btrfs] [72747.566999] ? finish_wait+0x80/0x80 [72747.567024] lock_extent_bits+0x37/0x90 [btrfs] [72747.567047] btrfs_invalidatepage+0x299/0x2c0 [btrfs] [72747.567051] ? find_get_pages_range_tag+0x2cd/0x380 [72747.567076] __extent_writepage+0x203/0x320 [btrfs] [72747.567102] extent_write_cache_pages+0x2bb/0x440 [btrfs] [72747.567106] ? update_load_avg+0x7e/0x5f0 [72747.567109] ? enqueue_entity+0xf4/0x6f0 [72747.567134] extent_writepages+0x44/0xa0 [btrfs] [72747.567137] ? enqueue_task_fair+0x93/0x6f0 [72747.567140] do_writepages+0x41/0xd0 [72747.567144] __filemap_fdatawrite_range+0xc7/0x100 [72747.567167] btrfs_run_delalloc_work+0x17/0x40 [btrfs] [72747.567195] btrfs_work_helper+0xc2/0x300 [btrfs] [72747.567200] process_one_work+0x1aa/0x340 [72747.567202] worker_thread+0x30/0x390 [72747.567205] ? create_worker+0x1a0/0x1a0 [72747.567208] kthread+0x116/0x130 [72747.567211] ? kthread_park+0x80/0x80 [72747.567214] ret_from_fork+0x1f/0x30 [72747.569686] task:fsstress state:D stack: 0 pid:841421 ppid:841417 flags:0x00000000 [72747.569689] Call Trace: [72747.569691] __schedule+0x296/0x760 [72747.569694] schedule+0x3c/0xa0 [72747.569721] try_flush_qgroup+0x95/0x140 [btrfs] [72747.569725] ? finish_wait+0x80/0x80 [72747.569753] btrfs_qgroup_reserve_data+0x34/0x50 [btrfs] [72747.569781] btrfs_check_data_free_space+0x5f/0xa0 [btrfs] [72747.569804] btrfs_buffered_write+0x1f7/0x7f0 [btrfs] [72747.569810] ? path_lookupat.isra.48+0x97/0x140 [72747.569833] btrfs_file_write_iter+0x81/0x410 [btrfs] [72747.569836] ? __kmalloc+0x16a/0x2c0 [72747.569839] do_iter_readv_writev+0x160/0x1c0 [72747.569843] do_iter_write+0x80/0x1b0 [72747.569847] vfs_writev+0x84/0x140 [72747.569869] ? btrfs_file_llseek+0x38/0x270 [btrfs] [72747.569873] do_writev+0x65/0x100 [72747.569876] do_syscall_64+0x33/0x40 [72747.569879] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [72747.569899] task:fsstress state:D stack: 0 pid:841424 ppid:841417 flags:0x00004000 [72747.569903] Call Trace: [72747.569906] __schedule+0x296/0x760 [72747.569909] schedule+0x3c/0xa0 [72747.569936] try_flush_qgroup+0x95/0x140 [btrfs] [72747.569940] ? finish_wait+0x80/0x80 [72747.569967] __btrfs_qgroup_reserve_meta+0x36/0x50 [btrfs] [72747.569989] start_transaction+0x279/0x580 [btrfs] [72747.570014] clone_copy_inline_extent+0x332/0x490 [btrfs] [72747.570041] btrfs_clone+0x5b7/0x7a0 [btrfs] [72747.570068] ? lock_extent_bits+0x64/0x90 [btrfs] [72747.570095] btrfs_clone_files+0xfc/0x150 [btrfs] [72747.570122] btrfs_remap_file_range+0x3d8/0x4a0 [btrfs] [72747.570126] do_clone_file_range+0xed/0x200 [72747.570131] vfs_clone_file_range+0x37/0x110 [72747.570134] ioctl_file_clone+0x7d/0xb0 [72747.570137] do_vfs_ioctl+0x138/0x630 [72747.570140] __x64_sys_ioctl+0x62/0xc0 [72747.570143] do_syscall_64+0x33/0x40 [72747.570146] entry_SYSCALL_64_after_hwframe+0x44/0xa9 So fix this by skipping the flush of delalloc for an inode that is flagged with BTRFS_INODE_NO_DELALLOC_FLUSH, meaning it is currently under such a special case of cloning an inline extent, when flushing delalloc during qgroup metadata reservation. The special cases for cloning inline extents were added in kernel 5.7 by by commit 05a5a7621ce66c ("Btrfs: implement full reflink support for inline extents"), while having qgroup metadata space reservation flushing delalloc when low on space was added in kernel 5.9 by commit c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT"). So use a "Fixes:" tag for the later commit to ease stable kernel backports. Reported-by: Wang Yugui Link: https://lore.kernel.org/linux-btrfs/20210421083137.31E3.409509F4@e16-tech.com/ Fixes: c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT") CC: stable@vger.kernel.org # 5.9+ Reviewed-by: Qu Wenruo Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ctree.h | 2 +- fs/btrfs/inode.c | 4 ++-- fs/btrfs/ioctl.c | 2 +- fs/btrfs/qgroup.c | 2 +- fs/btrfs/send.c | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 278e0cbc9a98b..0f5b0b12762bb 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3127,7 +3127,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, struct btrfs_inode *inode, u64 new_size, u32 min_type); -int btrfs_start_delalloc_snapshot(struct btrfs_root *root); +int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context); int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, bool in_reclaim_context); int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1a349759efae4..69fcdf8f0b1c2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9691,7 +9691,7 @@ out: return ret; } -int btrfs_start_delalloc_snapshot(struct btrfs_root *root) +int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context) { struct writeback_control wbc = { .nr_to_write = LONG_MAX, @@ -9704,7 +9704,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root) if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) return -EROFS; - return start_delalloc_inodes(root, &wbc, true, false); + return start_delalloc_inodes(root, &wbc, true, in_reclaim_context); } int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b1328f17607ea..0ba0e4ddaf6b4 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1051,7 +1051,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent, */ btrfs_drew_read_lock(&root->snapshot_lock); - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) goto out; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b1caf5acf1e2c..3ded812f522cc 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3566,7 +3566,7 @@ static int try_flush_qgroup(struct btrfs_root *root) return 0; } - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, true); if (ret < 0) goto out; btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 55741adf90712..bd69db72acc5e 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -7170,7 +7170,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx) int i; if (root) { - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) return ret; btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); @@ -7178,7 +7178,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx) for (i = 0; i < sctx->clone_roots_cnt; i++) { root = sctx->clone_roots[i].root; - ret = btrfs_start_delalloc_snapshot(root); + ret = btrfs_start_delalloc_snapshot(root, false); if (ret) return ret; btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX); -- GitLab From 02ded1314a465a89267be38231d9858206853d80 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Fri, 23 Apr 2021 15:04:20 -0400 Subject: [PATCH 0027/3804] drm/msm: fix minor version to indicate MSM_PARAM_SUSPENDS support Increase the minor version to indicate that MSM_PARAM_SUSPENDS is supported. Fixes: 3ab1c5cc3939 ("drm/msm: Add param for userspace to query suspend count") Signed-off-by: Jonathan Marek Link: https://lore.kernel.org/r/20210423190420.25217-1-jonathan@marek.ca Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/msm_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index e1104d2454e2e..fe7d17cd35ecd 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -42,7 +42,7 @@ * - 1.7.0 - Add MSM_PARAM_SUSPENDS to access suspend count */ #define MSM_VERSION_MAJOR 1 -#define MSM_VERSION_MINOR 6 +#define MSM_VERSION_MINOR 7 #define MSM_VERSION_PATCHLEVEL 0 static const struct drm_mode_config_funcs mode_config_funcs = { -- GitLab From 121271f08809e5dc01d15d3e529988ac5d740af6 Mon Sep 17 00:00:00 2001 From: Amit Kumar Mahapatra Date: Wed, 28 Apr 2021 23:38:01 -0600 Subject: [PATCH 0028/3804] spi: spi-zynq-qspi: Fix kernel-doc warning Fix kernel-doc warning. Signed-off-by: Amit Kumar Mahapatra Link: https://lore.kernel.org/r/20210429053802.17650-2-amit.kumar-mahapatra@xilinx.com Signed-off-by: Mark Brown --- drivers/spi/spi-zynq-qspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c index 5d8a5ee62fa23..1acde9e249731 100644 --- a/drivers/spi/spi-zynq-qspi.c +++ b/drivers/spi/spi-zynq-qspi.c @@ -367,7 +367,7 @@ static int zynq_qspi_config_op(struct zynq_qspi *xqspi, struct spi_device *spi) } /** - * zynq_qspi_setup - Configure the QSPI controller + * zynq_qspi_setup_op - Configure the QSPI controller * @spi: Pointer to the spi_device structure * * Sets the operational mode of QSPI controller for the next QSPI transfer, baud -- GitLab From 6d5ff8e632a4f2389c331e5554cd1c2a9a28c7aa Mon Sep 17 00:00:00 2001 From: Karen Dombroski Date: Wed, 28 Apr 2021 23:38:02 -0600 Subject: [PATCH 0029/3804] spi: spi-zynq-qspi: Fix stack violation bug When the number of bytes for the op is greater than one, the read could run off the end of the function stack and cause a crash. This patch restores the behaviour of safely reading out of the original opcode location. Signed-off-by: Karen Dombroski Signed-off-by: Amit Kumar Mahapatra Link: https://lore.kernel.org/r/20210429053802.17650-3-amit.kumar-mahapatra@xilinx.com Signed-off-by: Mark Brown --- drivers/spi/spi-zynq-qspi.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c index 1acde9e249731..5a3d81c31d040 100644 --- a/drivers/spi/spi-zynq-qspi.c +++ b/drivers/spi/spi-zynq-qspi.c @@ -528,18 +528,17 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem, struct zynq_qspi *xqspi = spi_controller_get_devdata(mem->spi->master); int err = 0, i; u8 *tmpbuf; - u8 opcode = op->cmd.opcode; dev_dbg(xqspi->dev, "cmd:%#x mode:%d.%d.%d.%d\n", - opcode, op->cmd.buswidth, op->addr.buswidth, + op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth, op->dummy.buswidth, op->data.buswidth); zynq_qspi_chipselect(mem->spi, true); zynq_qspi_config_op(xqspi, mem->spi); - if (op->cmd.nbytes) { + if (op->cmd.opcode) { reinit_completion(&xqspi->data_completion); - xqspi->txbuf = &opcode; + xqspi->txbuf = (u8 *)&op->cmd.opcode; xqspi->rxbuf = NULL; xqspi->tx_bytes = op->cmd.nbytes; xqspi->rx_bytes = op->cmd.nbytes; -- GitLab From f9c82a4ea89c384d49ce03768ba88d049ed3f1f0 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 22 Apr 2021 14:27:08 +0200 Subject: [PATCH 0030/3804] Increase size of ucounts to atomic_long_t RLIMIT_MSGQUEUE and RLIMIT_MEMLOCK use unsigned long to store their counters. As a preparation for moving rlimits based on ucounts, we need to increase the size of the variable to long. Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/257aa5fb1a7d81cf0f4c34f39ada2320c4284771.1619094428.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- include/linux/user_namespace.h | 4 ++-- kernel/ucount.c | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index f6c5f784be5ab..c242c10906c50 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -88,7 +88,7 @@ struct user_namespace { struct ctl_table_header *sysctls; #endif struct ucounts *ucounts; - int ucount_max[UCOUNT_COUNTS]; + long ucount_max[UCOUNT_COUNTS]; } __randomize_layout; struct ucounts { @@ -96,7 +96,7 @@ struct ucounts { struct user_namespace *ns; kuid_t uid; int count; - atomic_t ucount[UCOUNT_COUNTS]; + atomic_long_t ucount[UCOUNT_COUNTS]; }; extern struct user_namespace init_user_ns; diff --git a/kernel/ucount.c b/kernel/ucount.c index 11b1596e2542a..04c561751af1e 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -175,14 +175,14 @@ static void put_ucounts(struct ucounts *ucounts) kfree(ucounts); } -static inline bool atomic_inc_below(atomic_t *v, int u) +static inline bool atomic_long_inc_below(atomic_long_t *v, int u) { - int c, old; - c = atomic_read(v); + long c, old; + c = atomic_long_read(v); for (;;) { if (unlikely(c >= u)) return false; - old = atomic_cmpxchg(v, c, c+1); + old = atomic_long_cmpxchg(v, c, c+1); if (likely(old == c)) return true; c = old; @@ -196,17 +196,17 @@ struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, struct user_namespace *tns; ucounts = get_ucounts(ns, uid); for (iter = ucounts; iter; iter = tns->ucounts) { - int max; + long max; tns = iter->ns; max = READ_ONCE(tns->ucount_max[type]); - if (!atomic_inc_below(&iter->ucount[type], max)) + if (!atomic_long_inc_below(&iter->ucount[type], max)) goto fail; } return ucounts; fail: bad = iter; for (iter = ucounts; iter != bad; iter = iter->ns->ucounts) - atomic_dec(&iter->ucount[type]); + atomic_long_dec(&iter->ucount[type]); put_ucounts(ucounts); return NULL; @@ -216,7 +216,7 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type) { struct ucounts *iter; for (iter = ucounts; iter; iter = iter->ns->ucounts) { - int dec = atomic_dec_if_positive(&iter->ucount[type]); + long dec = atomic_long_dec_if_positive(&iter->ucount[type]); WARN_ON_ONCE(dec < 0); } put_ucounts(ucounts); -- GitLab From 905ae01c4ae2ae3df05bb141801b1db4b7d83c61 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 22 Apr 2021 14:27:09 +0200 Subject: [PATCH 0031/3804] Add a reference to ucounts for each cred For RLIMIT_NPROC and some other rlimits the user_struct that holds the global limit is kept alive for the lifetime of a process by keeping it in struct cred. Adding a pointer to ucounts in the struct cred will allow to track RLIMIT_NPROC not only for user in the system, but for user in the user_namespace. Updating ucounts may require memory allocation which may fail. So, we cannot change cred.ucounts in the commit_creds() because this function cannot fail and it should always return 0. For this reason, we modify cred.ucounts before calling the commit_creds(). Changelog v6: * Fix null-ptr-deref in is_ucounts_overlimit() detected by trinity. This error was caused by the fact that cred_alloc_blank() left the ucounts pointer empty. Reported-by: kernel test robot Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/b37aaef28d8b9b0d757e07ba6dd27281bbe39259.1619094428.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- fs/exec.c | 4 ++++ include/linux/cred.h | 2 ++ include/linux/user_namespace.h | 4 ++++ kernel/cred.c | 40 ++++++++++++++++++++++++++++++++++ kernel/fork.c | 6 +++++ kernel/sys.c | 12 ++++++++++ kernel/ucount.c | 40 +++++++++++++++++++++++++++++++--- kernel/user_namespace.c | 3 +++ 8 files changed, 108 insertions(+), 3 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 18594f11c31fe..d7c4187ca023e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1360,6 +1360,10 @@ int begin_new_exec(struct linux_binprm * bprm) WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1); flush_signal_handlers(me, 0); + retval = set_cred_ucounts(bprm->cred); + if (retval < 0) + goto out_unlock; + /* * install the new credentials for this executable */ diff --git a/include/linux/cred.h b/include/linux/cred.h index 4c63505036977..66436e6550328 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -144,6 +144,7 @@ struct cred { #endif struct user_struct *user; /* real user ID subscription */ struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */ + struct ucounts *ucounts; struct group_info *group_info; /* supplementary groups for euid/fsgid */ /* RCU deletion */ union { @@ -170,6 +171,7 @@ extern int set_security_override_from_ctx(struct cred *, const char *); extern int set_create_files_as(struct cred *, struct inode *); extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); +extern int set_cred_ucounts(struct cred *); /* * check for validity of credentials diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index c242c10906c50..7919b80d57ed0 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -100,11 +100,15 @@ struct ucounts { }; extern struct user_namespace init_user_ns; +extern struct ucounts init_ucounts; bool setup_userns_sysctls(struct user_namespace *ns); void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); +struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); +struct ucounts *get_ucounts(struct ucounts *ucounts); +void put_ucounts(struct ucounts *ucounts); #ifdef CONFIG_USER_NS diff --git a/kernel/cred.c b/kernel/cred.c index 421b1149c6516..58a8a9e24347d 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -60,6 +60,7 @@ struct cred init_cred = { .user = INIT_USER, .user_ns = &init_user_ns, .group_info = &init_groups, + .ucounts = &init_ucounts, }; static inline void set_cred_subscribers(struct cred *cred, int n) @@ -119,6 +120,8 @@ static void put_cred_rcu(struct rcu_head *rcu) if (cred->group_info) put_group_info(cred->group_info); free_uid(cred->user); + if (cred->ucounts) + put_ucounts(cred->ucounts); put_user_ns(cred->user_ns); kmem_cache_free(cred_jar, cred); } @@ -222,6 +225,7 @@ struct cred *cred_alloc_blank(void) #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; #endif + new->ucounts = get_ucounts(&init_ucounts); if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; @@ -284,6 +288,11 @@ struct cred *prepare_creds(void) if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; + + new->ucounts = get_ucounts(new->ucounts); + if (!new->ucounts) + goto error; + validate_creds(new); return new; @@ -363,6 +372,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) ret = create_user_ns(new); if (ret < 0) goto error_put; + if (set_cred_ucounts(new) < 0) + goto error_put; } #ifdef CONFIG_KEYS @@ -653,6 +664,31 @@ int cred_fscmp(const struct cred *a, const struct cred *b) } EXPORT_SYMBOL(cred_fscmp); +int set_cred_ucounts(struct cred *new) +{ + struct task_struct *task = current; + const struct cred *old = task->real_cred; + struct ucounts *old_ucounts = new->ucounts; + + if (new->user == old->user && new->user_ns == old->user_ns) + return 0; + + /* + * This optimization is needed because alloc_ucounts() uses locks + * for table lookups. + */ + if (old_ucounts && old_ucounts->ns == new->user_ns && uid_eq(old_ucounts->uid, new->euid)) + return 0; + + if (!(new->ucounts = alloc_ucounts(new->user_ns, new->euid))) + return -EAGAIN; + + if (old_ucounts) + put_ucounts(old_ucounts); + + return 0; +} + /* * initialise the credentials stuff */ @@ -719,6 +755,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; + new->ucounts = get_ucounts(new->ucounts); + if (!new->ucounts) + goto error; + put_cred(old); validate_creds(new); return new; diff --git a/kernel/fork.c b/kernel/fork.c index 426cd0c51f9eb..321a5e31d817e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2995,6 +2995,12 @@ int ksys_unshare(unsigned long unshare_flags) if (err) goto bad_unshare_cleanup_cred; + if (new_cred) { + err = set_cred_ucounts(new_cred); + if (err) + goto bad_unshare_cleanup_cred; + } + if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) { if (do_sysvsem) { /* diff --git a/kernel/sys.c b/kernel/sys.c index 2e2e3f378d97f..cabfc5b861754 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -552,6 +552,10 @@ long __sys_setreuid(uid_t ruid, uid_t euid) if (retval < 0) goto error; + retval = set_cred_ucounts(new); + if (retval < 0) + goto error; + return commit_creds(new); error: @@ -610,6 +614,10 @@ long __sys_setuid(uid_t uid) if (retval < 0) goto error; + retval = set_cred_ucounts(new); + if (retval < 0) + goto error; + return commit_creds(new); error: @@ -685,6 +693,10 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) if (retval < 0) goto error; + retval = set_cred_ucounts(new); + if (retval < 0) + goto error; + return commit_creds(new); error: diff --git a/kernel/ucount.c b/kernel/ucount.c index 04c561751af1e..50cc1dfb7d28a 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -8,6 +8,12 @@ #include #include +struct ucounts init_ucounts = { + .ns = &init_user_ns, + .uid = GLOBAL_ROOT_UID, + .count = 1, +}; + #define UCOUNTS_HASHTABLE_BITS 10 static struct hlist_head ucounts_hashtable[(1 << UCOUNTS_HASHTABLE_BITS)]; static DEFINE_SPINLOCK(ucounts_lock); @@ -125,7 +131,15 @@ static struct ucounts *find_ucounts(struct user_namespace *ns, kuid_t uid, struc return NULL; } -static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) +static void hlist_add_ucounts(struct ucounts *ucounts) +{ + struct hlist_head *hashent = ucounts_hashentry(ucounts->ns, ucounts->uid); + spin_lock_irq(&ucounts_lock); + hlist_add_head(&ucounts->node, hashent); + spin_unlock_irq(&ucounts_lock); +} + +struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); struct ucounts *ucounts, *new; @@ -160,7 +174,26 @@ static struct ucounts *get_ucounts(struct user_namespace *ns, kuid_t uid) return ucounts; } -static void put_ucounts(struct ucounts *ucounts) +struct ucounts *get_ucounts(struct ucounts *ucounts) +{ + unsigned long flags; + + if (!ucounts) + return NULL; + + spin_lock_irqsave(&ucounts_lock, flags); + if (ucounts->count == INT_MAX) { + WARN_ONCE(1, "ucounts: counter has reached its maximum value"); + ucounts = NULL; + } else { + ucounts->count += 1; + } + spin_unlock_irqrestore(&ucounts_lock, flags); + + return ucounts; +} + +void put_ucounts(struct ucounts *ucounts) { unsigned long flags; @@ -194,7 +227,7 @@ struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, { struct ucounts *ucounts, *iter, *bad; struct user_namespace *tns; - ucounts = get_ucounts(ns, uid); + ucounts = alloc_ucounts(ns, uid); for (iter = ucounts; iter; iter = tns->ucounts) { long max; tns = iter->ns; @@ -237,6 +270,7 @@ static __init int user_namespace_sysctl_init(void) BUG_ON(!user_header); BUG_ON(!setup_userns_sysctls(&init_user_ns)); #endif + hlist_add_ucounts(&init_ucounts); return 0; } subsys_initcall(user_namespace_sysctl_init); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 9a4b980d695b8..f1b7b4b8ffa25 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -1340,6 +1340,9 @@ static int userns_install(struct nsset *nsset, struct ns_common *ns) put_user_ns(cred->user_ns); set_cred_user_ns(cred, get_user_ns(user_ns)); + if (set_cred_ucounts(cred) < 0) + return -EINVAL; + return 0; } -- GitLab From b6c336528926ef73b0f70260f2636de2c3b94c14 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 22 Apr 2021 14:27:10 +0200 Subject: [PATCH 0032/3804] Use atomic_t for ucounts reference counting The current implementation of the ucounts reference counter requires the use of spin_lock. We're going to use get_ucounts() in more performance critical areas like a handling of RLIMIT_SIGPENDING. Now we need to use spin_lock only if we want to change the hashtable. v10: * Always try to put ucounts in case we cannot increase ucounts->count. This will allow to cover the case when all consumers will return ucounts at once. v9: * Use a negative value to check that the ucounts->count is close to overflow. Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/94d1dbecab060a6b116b0a2d1accd8ca1bbb4f5f.1619094428.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- include/linux/user_namespace.h | 4 +-- kernel/ucount.c | 53 ++++++++++++---------------------- 2 files changed, 21 insertions(+), 36 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 7919b80d57ed0..80b5bf12feaeb 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -95,7 +95,7 @@ struct ucounts { struct hlist_node node; struct user_namespace *ns; kuid_t uid; - int count; + atomic_t count; atomic_long_t ucount[UCOUNT_COUNTS]; }; @@ -107,7 +107,7 @@ void retire_userns_sysctls(struct user_namespace *ns); struct ucounts *inc_ucount(struct user_namespace *ns, kuid_t uid, enum ucount_type type); void dec_ucount(struct ucounts *ucounts, enum ucount_type type); struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); -struct ucounts *get_ucounts(struct ucounts *ucounts); +struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts); #ifdef CONFIG_USER_NS diff --git a/kernel/ucount.c b/kernel/ucount.c index 50cc1dfb7d28a..365865f368ecd 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -11,7 +11,7 @@ struct ucounts init_ucounts = { .ns = &init_user_ns, .uid = GLOBAL_ROOT_UID, - .count = 1, + .count = ATOMIC_INIT(1), }; #define UCOUNTS_HASHTABLE_BITS 10 @@ -139,6 +139,15 @@ static void hlist_add_ucounts(struct ucounts *ucounts) spin_unlock_irq(&ucounts_lock); } +struct ucounts *get_ucounts(struct ucounts *ucounts) +{ + if (ucounts && atomic_add_negative(1, &ucounts->count)) { + put_ucounts(ucounts); + ucounts = NULL; + } + return ucounts; +} + struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) { struct hlist_head *hashent = ucounts_hashentry(ns, uid); @@ -155,7 +164,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) new->ns = ns; new->uid = uid; - new->count = 0; + atomic_set(&new->count, 1); spin_lock_irq(&ucounts_lock); ucounts = find_ucounts(ns, uid, hashent); @@ -163,33 +172,12 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) kfree(new); } else { hlist_add_head(&new->node, hashent); - ucounts = new; + spin_unlock_irq(&ucounts_lock); + return new; } } - if (ucounts->count == INT_MAX) - ucounts = NULL; - else - ucounts->count += 1; spin_unlock_irq(&ucounts_lock); - return ucounts; -} - -struct ucounts *get_ucounts(struct ucounts *ucounts) -{ - unsigned long flags; - - if (!ucounts) - return NULL; - - spin_lock_irqsave(&ucounts_lock, flags); - if (ucounts->count == INT_MAX) { - WARN_ONCE(1, "ucounts: counter has reached its maximum value"); - ucounts = NULL; - } else { - ucounts->count += 1; - } - spin_unlock_irqrestore(&ucounts_lock, flags); - + ucounts = get_ucounts(ucounts); return ucounts; } @@ -197,15 +185,12 @@ void put_ucounts(struct ucounts *ucounts) { unsigned long flags; - spin_lock_irqsave(&ucounts_lock, flags); - ucounts->count -= 1; - if (!ucounts->count) + if (atomic_dec_and_test(&ucounts->count)) { + spin_lock_irqsave(&ucounts_lock, flags); hlist_del_init(&ucounts->node); - else - ucounts = NULL; - spin_unlock_irqrestore(&ucounts_lock, flags); - - kfree(ucounts); + spin_unlock_irqrestore(&ucounts_lock, flags); + kfree(ucounts); + } } static inline bool atomic_long_inc_below(atomic_long_t *v, int u) -- GitLab From 21d1c5e386bc751f1953b371d72cd5b7d9c9e270 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 22 Apr 2021 14:27:11 +0200 Subject: [PATCH 0033/3804] Reimplement RLIMIT_NPROC on top of ucounts The rlimit counter is tied to uid in the user_namespace. This allows rlimit values to be specified in userns even if they are already globally exceeded by the user. However, the value of the previous user_namespaces cannot be exceeded. To illustrate the impact of rlimits, let's say there is a program that does not fork. Some service-A wants to run this program as user X in multiple containers. Since the program never fork the service wants to set RLIMIT_NPROC=1. service-A \- program (uid=1000, container1, rlimit_nproc=1) \- program (uid=1000, container2, rlimit_nproc=1) The service-A sets RLIMIT_NPROC=1 and runs the program in container1. When the service-A tries to run a program with RLIMIT_NPROC=1 in container2 it fails since user X already has one running process. We cannot use existing inc_ucounts / dec_ucounts because they do not allow us to exceed the maximum for the counter. Some rlimits can be overlimited by root or if the user has the appropriate capability. Changelog v11: * Change inc_rlimit_ucounts() which now returns top value of ucounts. * Drop inc_rlimit_ucounts_and_test() because the return code of inc_rlimit_ucounts() can be checked. Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/c5286a8aa16d2d698c222f7532f3d735c82bc6bc.1619094428.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- fs/exec.c | 2 +- include/linux/cred.h | 2 ++ include/linux/sched/user.h | 1 - include/linux/user_namespace.h | 12 ++++++++++ kernel/cred.c | 10 ++++---- kernel/exit.c | 2 +- kernel/fork.c | 9 +++---- kernel/sys.c | 2 +- kernel/ucount.c | 44 ++++++++++++++++++++++++++++++++++ kernel/user.c | 1 - kernel/user_namespace.c | 3 ++- 11 files changed, 73 insertions(+), 15 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index d7c4187ca023e..f2bcdbeb3afb7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1878,7 +1878,7 @@ static int do_execveat_common(int fd, struct filename *filename, * whether NPROC limit is still exceeded. */ if ((current->flags & PF_NPROC_EXCEEDED) && - atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { + is_ucounts_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { retval = -EAGAIN; goto out_ret; } diff --git a/include/linux/cred.h b/include/linux/cred.h index 66436e6550328..5ca1e8a1d0354 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -372,6 +372,7 @@ static inline void put_cred(const struct cred *_cred) #define task_uid(task) (task_cred_xxx((task), uid)) #define task_euid(task) (task_cred_xxx((task), euid)) +#define task_ucounts(task) (task_cred_xxx((task), ucounts)) #define current_cred_xxx(xxx) \ ({ \ @@ -388,6 +389,7 @@ static inline void put_cred(const struct cred *_cred) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) +#define current_ucounts() (current_cred_xxx(ucounts)) extern struct user_namespace init_user_ns; #ifdef CONFIG_USER_NS diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index a8ec3b6093fcb..d33d867ad6c12 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -12,7 +12,6 @@ */ struct user_struct { refcount_t __count; /* reference count */ - atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_FANOTIFY atomic_t fanotify_listeners; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 80b5bf12feaeb..4a97acc359903 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -50,9 +50,12 @@ enum ucount_type { UCOUNT_INOTIFY_INSTANCES, UCOUNT_INOTIFY_WATCHES, #endif + UCOUNT_RLIMIT_NPROC, UCOUNT_COUNTS, }; +#define MAX_PER_NAMESPACE_UCOUNTS UCOUNT_RLIMIT_NPROC + struct user_namespace { struct uid_gid_map uid_map; struct uid_gid_map gid_map; @@ -110,6 +113,15 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid); struct ucounts * __must_check get_ucounts(struct ucounts *ucounts); void put_ucounts(struct ucounts *ucounts); +static inline long get_ucounts_value(struct ucounts *ucounts, enum ucount_type type) +{ + return atomic_long_read(&ucounts->ucount[type]); +} + +long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); +bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); +bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max); + #ifdef CONFIG_USER_NS static inline struct user_namespace *get_user_ns(struct user_namespace *ns) diff --git a/kernel/cred.c b/kernel/cred.c index 58a8a9e24347d..dcfa30b337c5a 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -360,7 +360,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) kdebug("share_creds(%p{%d,%d})", p->cred, atomic_read(&p->cred->usage), read_cred_subscribers(p->cred)); - atomic_inc(&p->cred->user->processes); + inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); return 0; } @@ -395,8 +395,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) } #endif - atomic_inc(&new->user->processes); p->cred = p->real_cred = get_cred(new); + inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); alter_cred_subscribers(new, 2); validate_creds(new); return 0; @@ -496,12 +496,12 @@ int commit_creds(struct cred *new) * in set_user(). */ alter_cred_subscribers(new, 2); - if (new->user != old->user) - atomic_inc(&new->user->processes); + if (new->user != old->user || new->user_ns != old->user_ns) + inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1); rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user) - atomic_dec(&old->user->processes); + dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1); alter_cred_subscribers(old, -2); /* send notifications */ diff --git a/kernel/exit.c b/kernel/exit.c index 04029e35e69af..61c0fe902b508 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -188,7 +188,7 @@ repeat: /* don't need to get the RCU readlock here - the process is dead and * can't be modifying its own credentials. But shut RCU-lockdep up */ rcu_read_lock(); - atomic_dec(&__task_cred(p)->user->processes); + dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); rcu_read_unlock(); cgroup_release(p); diff --git a/kernel/fork.c b/kernel/fork.c index 321a5e31d817e..ed7dfb07178d3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -819,9 +819,11 @@ void __init fork_init(void) init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; - for (i = 0; i < UCOUNT_COUNTS; i++) + for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) init_user_ns.ucount_max[i] = max_threads/2; + init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC); + #ifdef CONFIG_VMAP_STACK cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", NULL, free_vm_stack_cache); @@ -1978,8 +1980,7 @@ static __latent_entropy struct task_struct *copy_process( DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif retval = -EAGAIN; - if (atomic_read(&p->real_cred->user->processes) >= - task_rlimit(p, RLIMIT_NPROC)) { + if (is_ucounts_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) goto bad_fork_free; @@ -2382,7 +2383,7 @@ bad_fork_cleanup_threadgroup_lock: #endif delayacct_tsk_free(p); bad_fork_cleanup_count: - atomic_dec(&p->cred->user->processes); + dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); exit_creds(p); bad_fork_free: p->state = TASK_DEAD; diff --git a/kernel/sys.c b/kernel/sys.c index cabfc5b861754..00266a65a0006 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -473,7 +473,7 @@ static int set_user(struct cred *new) * for programs doing set*uid()+execve() by harmlessly deferring the * failure to the execve() stage. */ - if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) && + if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) && new_user != INIT_USER) current->flags |= PF_NPROC_EXCEEDED; else diff --git a/kernel/ucount.c b/kernel/ucount.c index 365865f368ecd..6caa56f7dec85 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -80,6 +80,7 @@ static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_inotify_instances"), UCOUNT_ENTRY("max_inotify_watches"), #endif + { }, { } }; #endif /* CONFIG_SYSCTL */ @@ -240,6 +241,48 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type) put_ucounts(ucounts); } +long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) +{ + struct ucounts *iter; + long ret = 0; + + for (iter = ucounts; iter; iter = iter->ns->ucounts) { + long max = READ_ONCE(iter->ns->ucount_max[type]); + long new = atomic_long_add_return(v, &iter->ucount[type]); + if (new < 0 || new > max) + ret = LONG_MAX; + else if (iter == ucounts) + ret = new; + } + return ret; +} + +bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) +{ + struct ucounts *iter; + long new; + for (iter = ucounts; iter; iter = iter->ns->ucounts) { + long dec = atomic_long_add_return(-v, &iter->ucount[type]); + WARN_ON_ONCE(dec < 0); + if (iter == ucounts) + new = dec; + } + return (new == 0); +} + +bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max) +{ + struct ucounts *iter; + if (get_ucounts_value(ucounts, type) > max) + return true; + for (iter = ucounts; iter; iter = iter->ns->ucounts) { + max = READ_ONCE(iter->ns->ucount_max[type]); + if (get_ucounts_value(iter, type) > max) + return true; + } + return false; +} + static __init int user_namespace_sysctl_init(void) { #ifdef CONFIG_SYSCTL @@ -256,6 +299,7 @@ static __init int user_namespace_sysctl_init(void) BUG_ON(!setup_userns_sysctls(&init_user_ns)); #endif hlist_add_ucounts(&init_ucounts); + inc_rlimit_ucounts(&init_ucounts, UCOUNT_RLIMIT_NPROC, 1); return 0; } subsys_initcall(user_namespace_sysctl_init); diff --git a/kernel/user.c b/kernel/user.c index a2478cddf536e..7f5ff498207a7 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock); /* root_user.__count is 1, for init task cred */ struct user_struct root_user = { .__count = REFCOUNT_INIT(1), - .processes = ATOMIC_INIT(1), .sigpending = ATOMIC_INIT(0), .locked_shm = 0, .uid = GLOBAL_ROOT_UID, diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index f1b7b4b8ffa25..e6577c8350720 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -119,9 +119,10 @@ int create_user_ns(struct cred *new) ns->owner = owner; ns->group = group; INIT_WORK(&ns->work, free_user_ns); - for (i = 0; i < UCOUNT_COUNTS; i++) { + for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) { ns->ucount_max[i] = INT_MAX; } + ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC); ns->ucounts = ucounts; /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ -- GitLab From 6e52a9f0532f912af37bab4caf18b57d1b9845f4 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 22 Apr 2021 14:27:12 +0200 Subject: [PATCH 0034/3804] Reimplement RLIMIT_MSGQUEUE on top of ucounts The rlimit counter is tied to uid in the user_namespace. This allows rlimit values to be specified in userns even if they are already globally exceeded by the user. However, the value of the previous user_namespaces cannot be exceeded. Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/2531f42f7884bbfee56a978040b3e0d25cdf6cde.1619094428.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- include/linux/sched/user.h | 4 ---- include/linux/user_namespace.h | 1 + ipc/mqueue.c | 40 ++++++++++++++++++---------------- kernel/fork.c | 1 + kernel/ucount.c | 1 + kernel/user_namespace.c | 1 + 6 files changed, 25 insertions(+), 23 deletions(-) diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index d33d867ad6c12..8a34446681aa6 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -18,10 +18,6 @@ struct user_struct { #endif #ifdef CONFIG_EPOLL atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ -#endif -#ifdef CONFIG_POSIX_MQUEUE - /* protected by mq_lock */ - unsigned long mq_bytes; /* How many bytes can be allocated to mqueue? */ #endif unsigned long locked_shm; /* How many pages of mlocked shm ? */ unsigned long unix_inflight; /* How many files in flight in unix sockets */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 4a97acc359903..5eeb86b00e686 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -51,6 +51,7 @@ enum ucount_type { UCOUNT_INOTIFY_WATCHES, #endif UCOUNT_RLIMIT_NPROC, + UCOUNT_RLIMIT_MSGQUEUE, UCOUNT_COUNTS, }; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 8031464ed4ae2..461fcf8c873dd 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -144,7 +144,7 @@ struct mqueue_inode_info { struct pid *notify_owner; u32 notify_self_exec_id; struct user_namespace *notify_user_ns; - struct user_struct *user; /* user who created, for accounting */ + struct ucounts *ucounts; /* user who created, for accounting */ struct sock *notify_sock; struct sk_buff *notify_cookie; @@ -292,7 +292,6 @@ static struct inode *mqueue_get_inode(struct super_block *sb, struct ipc_namespace *ipc_ns, umode_t mode, struct mq_attr *attr) { - struct user_struct *u = current_user(); struct inode *inode; int ret = -ENOMEM; @@ -321,7 +320,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, info->notify_owner = NULL; info->notify_user_ns = NULL; info->qsize = 0; - info->user = NULL; /* set when all is ok */ + info->ucounts = NULL; /* set when all is ok */ info->msg_tree = RB_ROOT; info->msg_tree_rightmost = NULL; info->node_cache = NULL; @@ -371,19 +370,23 @@ static struct inode *mqueue_get_inode(struct super_block *sb, if (mq_bytes + mq_treesize < mq_bytes) goto out_inode; mq_bytes += mq_treesize; - spin_lock(&mq_lock); - if (u->mq_bytes + mq_bytes < u->mq_bytes || - u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) { + info->ucounts = get_ucounts(current_ucounts()); + if (info->ucounts) { + long msgqueue; + + spin_lock(&mq_lock); + msgqueue = inc_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes); + if (msgqueue == LONG_MAX || msgqueue > rlimit(RLIMIT_MSGQUEUE)) { + dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes); + spin_unlock(&mq_lock); + put_ucounts(info->ucounts); + info->ucounts = NULL; + /* mqueue_evict_inode() releases info->messages */ + ret = -EMFILE; + goto out_inode; + } spin_unlock(&mq_lock); - /* mqueue_evict_inode() releases info->messages */ - ret = -EMFILE; - goto out_inode; } - u->mq_bytes += mq_bytes; - spin_unlock(&mq_lock); - - /* all is ok */ - info->user = get_uid(u); } else if (S_ISDIR(mode)) { inc_nlink(inode); /* Some things misbehave if size == 0 on a directory */ @@ -497,7 +500,6 @@ static void mqueue_free_inode(struct inode *inode) static void mqueue_evict_inode(struct inode *inode) { struct mqueue_inode_info *info; - struct user_struct *user; struct ipc_namespace *ipc_ns; struct msg_msg *msg, *nmsg; LIST_HEAD(tmp_msg); @@ -520,8 +522,7 @@ static void mqueue_evict_inode(struct inode *inode) free_msg(msg); } - user = info->user; - if (user) { + if (info->ucounts) { unsigned long mq_bytes, mq_treesize; /* Total amount of bytes accounted for the mqueue */ @@ -533,7 +534,7 @@ static void mqueue_evict_inode(struct inode *inode) info->attr.mq_msgsize); spin_lock(&mq_lock); - user->mq_bytes -= mq_bytes; + dec_rlimit_ucounts(info->ucounts, UCOUNT_RLIMIT_MSGQUEUE, mq_bytes); /* * get_ns_from_inode() ensures that the * (ipc_ns = sb->s_fs_info) is either a valid ipc_ns @@ -543,7 +544,8 @@ static void mqueue_evict_inode(struct inode *inode) if (ipc_ns) ipc_ns->mq_queues_count--; spin_unlock(&mq_lock); - free_uid(user); + put_ucounts(info->ucounts); + info->ucounts = NULL; } if (ipc_ns) put_ipc_ns(ipc_ns); diff --git a/kernel/fork.c b/kernel/fork.c index ed7dfb07178d3..a9c5097dfc860 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -823,6 +823,7 @@ void __init fork_init(void) init_user_ns.ucount_max[i] = max_threads/2; init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC); + init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE); #ifdef CONFIG_VMAP_STACK cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", diff --git a/kernel/ucount.c b/kernel/ucount.c index 6caa56f7dec85..6e6f936a5963a 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -80,6 +80,7 @@ static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_inotify_instances"), UCOUNT_ENTRY("max_inotify_watches"), #endif + { }, { }, { } }; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index e6577c8350720..7eccc4f84549f 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -123,6 +123,7 @@ int create_user_ns(struct cred *new) ns->ucount_max[i] = INT_MAX; } ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC); + ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE); ns->ucounts = ucounts; /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ -- GitLab From d64696905554e919321e31afc210606653b8f6a4 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 22 Apr 2021 14:27:13 +0200 Subject: [PATCH 0035/3804] Reimplement RLIMIT_SIGPENDING on top of ucounts The rlimit counter is tied to uid in the user_namespace. This allows rlimit values to be specified in userns even if they are already globally exceeded by the user. However, the value of the previous user_namespaces cannot be exceeded. Changelog v11: * Revert most of changes to fix performance issues. v10: * Fix memory leak on get_ucounts failure. Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/df9d7764dddd50f28616b7840de74ec0f81711a8.1619094428.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- fs/proc/array.c | 2 +- include/linux/sched/user.h | 1 - include/linux/signal_types.h | 4 +++- include/linux/user_namespace.h | 1 + kernel/fork.c | 1 + kernel/signal.c | 25 +++++++++++++------------ kernel/ucount.c | 1 + kernel/user.c | 1 - kernel/user_namespace.c | 1 + 9 files changed, 21 insertions(+), 16 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index bb87e4d89cd8f..74b0ea4b7e385 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -284,7 +284,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) collect_sigign_sigcatch(p, &ignored, &caught); num_threads = get_nr_threads(p); rcu_read_lock(); /* FIXME: is this correct? */ - qsize = atomic_read(&__task_cred(p)->user->sigpending); + qsize = get_ucounts_value(task_ucounts(p), UCOUNT_RLIMIT_SIGPENDING); rcu_read_unlock(); qlim = task_rlimit(p, RLIMIT_SIGPENDING); unlock_task_sighand(p, &flags); diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 8a34446681aa6..8ba9cec4fb99b 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -12,7 +12,6 @@ */ struct user_struct { refcount_t __count; /* reference count */ - atomic_t sigpending; /* How many pending signals does this user have? */ #ifdef CONFIG_FANOTIFY atomic_t fanotify_listeners; #endif diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h index 68e06c75c5b22..34cb28b8f16ca 100644 --- a/include/linux/signal_types.h +++ b/include/linux/signal_types.h @@ -13,6 +13,8 @@ typedef struct kernel_siginfo { __SIGINFO; } kernel_siginfo_t; +struct ucounts; + /* * Real Time signals may be queued. */ @@ -21,7 +23,7 @@ struct sigqueue { struct list_head list; int flags; kernel_siginfo_t info; - struct user_struct *user; + struct ucounts *ucounts; }; /* flags values. */ diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 5eeb86b00e686..58f4179864726 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -52,6 +52,7 @@ enum ucount_type { #endif UCOUNT_RLIMIT_NPROC, UCOUNT_RLIMIT_MSGQUEUE, + UCOUNT_RLIMIT_SIGPENDING, UCOUNT_COUNTS, }; diff --git a/kernel/fork.c b/kernel/fork.c index a9c5097dfc860..03119926b27dd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -824,6 +824,7 @@ void __init fork_init(void) init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC); init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE); + init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING); #ifdef CONFIG_VMAP_STACK cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", diff --git a/kernel/signal.c b/kernel/signal.c index f2718350bf4b5..9a6dab712123e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -413,8 +413,8 @@ static struct sigqueue * __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) { struct sigqueue *q = NULL; - struct user_struct *user; - int sigpending; + struct ucounts *ucounts = NULL; + long sigpending; /* * Protect access to @t credentials. This can go away when all @@ -425,27 +425,26 @@ __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimi * changes from/to zero. */ rcu_read_lock(); - user = __task_cred(t)->user; - sigpending = atomic_inc_return(&user->sigpending); + ucounts = task_ucounts(t); + sigpending = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1); if (sigpending == 1) - get_uid(user); + ucounts = get_ucounts(ucounts); rcu_read_unlock(); - if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { + if (override_rlimit || (sigpending < LONG_MAX && sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig); } if (unlikely(q == NULL)) { - if (atomic_dec_and_test(&user->sigpending)) - free_uid(user); + if (ucounts && dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) + put_ucounts(ucounts); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; - q->user = user; + q->ucounts = ucounts; } - return q; } @@ -453,8 +452,10 @@ static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - if (atomic_dec_and_test(&q->user->sigpending)) - free_uid(q->user); + if (q->ucounts && dec_rlimit_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING, 1)) { + put_ucounts(q->ucounts); + q->ucounts = NULL; + } kmem_cache_free(sigqueue_cachep, q); } diff --git a/kernel/ucount.c b/kernel/ucount.c index 6e6f936a5963a..8ce62da6a62c5 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -80,6 +80,7 @@ static struct ctl_table user_table[] = { UCOUNT_ENTRY("max_inotify_instances"), UCOUNT_ENTRY("max_inotify_watches"), #endif + { }, { }, { }, { } diff --git a/kernel/user.c b/kernel/user.c index 7f5ff498207a7..6737327f83beb 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock); /* root_user.__count is 1, for init task cred */ struct user_struct root_user = { .__count = REFCOUNT_INIT(1), - .sigpending = ATOMIC_INIT(0), .locked_shm = 0, .uid = GLOBAL_ROOT_UID, .ratelimit = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0), diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 7eccc4f84549f..822eacee45885 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -124,6 +124,7 @@ int create_user_ns(struct cred *new) } ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC); ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE); + ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING); ns->ucounts = ucounts; /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ -- GitLab From d7c9e99aee48e6bc0b427f3e3c658a6aba15001e Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 22 Apr 2021 14:27:14 +0200 Subject: [PATCH 0036/3804] Reimplement RLIMIT_MEMLOCK on top of ucounts The rlimit counter is tied to uid in the user_namespace. This allows rlimit values to be specified in userns even if they are already globally exceeded by the user. However, the value of the previous user_namespaces cannot be exceeded. Changelog v11: * Fix issue found by lkp robot. v8: * Fix issues found by lkp-tests project. v7: * Keep only ucounts for RLIMIT_MEMLOCK checks instead of struct cred. v6: * Fix bug in hugetlb_file_setup() detected by trinity. Reported-by: kernel test robot Reported-by: kernel test robot Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/970d50c70c71bfd4496e0e8d2a0a32feebebb350.1619094428.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- fs/hugetlbfs/inode.c | 16 ++++++++-------- include/linux/hugetlb.h | 4 ++-- include/linux/mm.h | 4 ++-- include/linux/sched/user.h | 1 - include/linux/shmem_fs.h | 2 +- include/linux/user_namespace.h | 1 + ipc/shm.c | 26 +++++++++++++------------- kernel/fork.c | 1 + kernel/ucount.c | 1 + kernel/user.c | 1 - kernel/user_namespace.c | 1 + mm/memfd.c | 4 ++-- mm/mlock.c | 22 ++++++++++++++-------- mm/mmap.c | 4 ++-- mm/shmem.c | 10 +++++----- 15 files changed, 53 insertions(+), 45 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 701c82c361383..be519fc9559a3 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1443,7 +1443,7 @@ static int get_hstate_idx(int page_size_log) * otherwise hugetlb_reserve_pages reserves one less hugepages than intended. */ struct file *hugetlb_file_setup(const char *name, size_t size, - vm_flags_t acctflag, struct user_struct **user, + vm_flags_t acctflag, struct ucounts **ucounts, int creat_flags, int page_size_log) { struct inode *inode; @@ -1455,20 +1455,20 @@ struct file *hugetlb_file_setup(const char *name, size_t size, if (hstate_idx < 0) return ERR_PTR(-ENODEV); - *user = NULL; + *ucounts = NULL; mnt = hugetlbfs_vfsmount[hstate_idx]; if (!mnt) return ERR_PTR(-ENOENT); if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { - *user = current_user(); - if (user_shm_lock(size, *user)) { + *ucounts = current_ucounts(); + if (user_shm_lock(size, *ucounts)) { task_lock(current); pr_warn_once("%s (%d): Using mlock ulimits for SHM_HUGETLB is deprecated\n", current->comm, current->pid); task_unlock(current); } else { - *user = NULL; + *ucounts = NULL; return ERR_PTR(-EPERM); } } @@ -1495,9 +1495,9 @@ struct file *hugetlb_file_setup(const char *name, size_t size, iput(inode); out: - if (*user) { - user_shm_unlock(size, *user); - *user = NULL; + if (*ucounts) { + user_shm_unlock(size, *ucounts); + *ucounts = NULL; } return file; } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index cccd1aab69dd1..96d63dbdec65c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -434,7 +434,7 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, - struct user_struct **user, int creat_flags, + struct ucounts **ucounts, int creat_flags, int page_size_log); static inline bool is_file_hugepages(struct file *file) @@ -454,7 +454,7 @@ static inline struct hstate *hstate_inode(struct inode *i) #define is_file_hugepages(file) false static inline struct file * hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, - struct user_struct **user, int creat_flags, + struct ucounts **ucounts, int creat_flags, int page_size_log) { return ERR_PTR(-ENOSYS); diff --git a/include/linux/mm.h b/include/linux/mm.h index 8ba434287387b..3b4e24738ce45 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1670,8 +1670,8 @@ extern bool can_do_mlock(void); #else static inline bool can_do_mlock(void) { return false; } #endif -extern int user_shm_lock(size_t, struct user_struct *); -extern void user_shm_unlock(size_t, struct user_struct *); +extern int user_shm_lock(size_t, struct ucounts *); +extern void user_shm_unlock(size_t, struct ucounts *); /* * Parameter block passed down to zap_pte_range in exceptional cases. diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h index 8ba9cec4fb99b..82bd2532da6bc 100644 --- a/include/linux/sched/user.h +++ b/include/linux/sched/user.h @@ -18,7 +18,6 @@ struct user_struct { #ifdef CONFIG_EPOLL atomic_long_t epoll_watches; /* The number of file descriptors currently watched */ #endif - unsigned long locked_shm; /* How many pages of mlocked shm ? */ unsigned long unix_inflight; /* How many files in flight in unix sockets */ atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */ diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index d82b6f3965885..aa77dcd1646fb 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -65,7 +65,7 @@ extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, extern int shmem_zero_setup(struct vm_area_struct *); extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -extern int shmem_lock(struct file *file, int lock, struct user_struct *user); +extern int shmem_lock(struct file *file, int lock, struct ucounts *ucounts); #ifdef CONFIG_SHMEM extern const struct address_space_operations shmem_aops; static inline bool shmem_mapping(struct address_space *mapping) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 58f4179864726..2a3177b9b8bfb 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -53,6 +53,7 @@ enum ucount_type { UCOUNT_RLIMIT_NPROC, UCOUNT_RLIMIT_MSGQUEUE, UCOUNT_RLIMIT_SIGPENDING, + UCOUNT_RLIMIT_MEMLOCK, UCOUNT_COUNTS, }; diff --git a/ipc/shm.c b/ipc/shm.c index febd88daba8c6..003234fbbd176 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -60,7 +60,7 @@ struct shmid_kernel /* private to the kernel */ time64_t shm_ctim; struct pid *shm_cprid; struct pid *shm_lprid; - struct user_struct *mlock_user; + struct ucounts *mlock_ucounts; /* The task created the shm object. NULL if the task is dead. */ struct task_struct *shm_creator; @@ -286,10 +286,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) shm_rmid(ns, shp); shm_unlock(shp); if (!is_file_hugepages(shm_file)) - shmem_lock(shm_file, 0, shp->mlock_user); - else if (shp->mlock_user) + shmem_lock(shm_file, 0, shp->mlock_ucounts); + else if (shp->mlock_ucounts) user_shm_unlock(i_size_read(file_inode(shm_file)), - shp->mlock_user); + shp->mlock_ucounts); fput(shm_file); ipc_update_pid(&shp->shm_cprid, NULL); ipc_update_pid(&shp->shm_lprid, NULL); @@ -625,7 +625,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) shp->shm_perm.key = key; shp->shm_perm.mode = (shmflg & S_IRWXUGO); - shp->mlock_user = NULL; + shp->mlock_ucounts = NULL; shp->shm_perm.security = NULL; error = security_shm_alloc(&shp->shm_perm); @@ -650,7 +650,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (shmflg & SHM_NORESERVE) acctflag = VM_NORESERVE; file = hugetlb_file_setup(name, hugesize, acctflag, - &shp->mlock_user, HUGETLB_SHMFS_INODE, + &shp->mlock_ucounts, HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); } else { /* @@ -698,8 +698,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) no_id: ipc_update_pid(&shp->shm_cprid, NULL); ipc_update_pid(&shp->shm_lprid, NULL); - if (is_file_hugepages(file) && shp->mlock_user) - user_shm_unlock(size, shp->mlock_user); + if (is_file_hugepages(file) && shp->mlock_ucounts) + user_shm_unlock(size, shp->mlock_ucounts); fput(file); ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); return error; @@ -1105,12 +1105,12 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd) goto out_unlock0; if (cmd == SHM_LOCK) { - struct user_struct *user = current_user(); + struct ucounts *ucounts = current_ucounts(); - err = shmem_lock(shm_file, 1, user); + err = shmem_lock(shm_file, 1, ucounts); if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) { shp->shm_perm.mode |= SHM_LOCKED; - shp->mlock_user = user; + shp->mlock_ucounts = ucounts; } goto out_unlock0; } @@ -1118,9 +1118,9 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd) /* SHM_UNLOCK */ if (!(shp->shm_perm.mode & SHM_LOCKED)) goto out_unlock0; - shmem_lock(shm_file, 0, shp->mlock_user); + shmem_lock(shm_file, 0, shp->mlock_ucounts); shp->shm_perm.mode &= ~SHM_LOCKED; - shp->mlock_user = NULL; + shp->mlock_ucounts = NULL; get_file(shm_file); ipc_unlock_object(&shp->shm_perm); rcu_read_unlock(); diff --git a/kernel/fork.c b/kernel/fork.c index 03119926b27dd..610fd4de60d76 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -825,6 +825,7 @@ void __init fork_init(void) init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC); init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE); init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING); + init_user_ns.ucount_max[UCOUNT_RLIMIT_MEMLOCK] = task_rlimit(&init_task, RLIMIT_MEMLOCK); #ifdef CONFIG_VMAP_STACK cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", diff --git a/kernel/ucount.c b/kernel/ucount.c index 8ce62da6a62c5..d316bac3e5200 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -83,6 +83,7 @@ static struct ctl_table user_table[] = { { }, { }, { }, + { }, { } }; #endif /* CONFIG_SYSCTL */ diff --git a/kernel/user.c b/kernel/user.c index 6737327f83beb..c82399c1618a6 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -98,7 +98,6 @@ static DEFINE_SPINLOCK(uidhash_lock); /* root_user.__count is 1, for init task cred */ struct user_struct root_user = { .__count = REFCOUNT_INIT(1), - .locked_shm = 0, .uid = GLOBAL_ROOT_UID, .ratelimit = RATELIMIT_STATE_INIT(root_user.ratelimit, 0, 0), }; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 822eacee45885..892da1360862a 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -125,6 +125,7 @@ int create_user_ns(struct cred *new) ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC); ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE); ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING); + ns->ucount_max[UCOUNT_RLIMIT_MEMLOCK] = rlimit(RLIMIT_MEMLOCK); ns->ucounts = ucounts; /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ diff --git a/mm/memfd.c b/mm/memfd.c index 2647c898990c8..081dd33e6a61b 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -297,9 +297,9 @@ SYSCALL_DEFINE2(memfd_create, } if (flags & MFD_HUGETLB) { - struct user_struct *user = NULL; + struct ucounts *ucounts = NULL; - file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user, + file = hugetlb_file_setup(name, 0, VM_NORESERVE, &ucounts, HUGETLB_ANONHUGE_INODE, (flags >> MFD_HUGE_SHIFT) & MFD_HUGE_MASK); diff --git a/mm/mlock.c b/mm/mlock.c index f8f8cc32d03d0..dd411aabf695b 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -817,9 +817,10 @@ SYSCALL_DEFINE0(munlockall) */ static DEFINE_SPINLOCK(shmlock_user_lock); -int user_shm_lock(size_t size, struct user_struct *user) +int user_shm_lock(size_t size, struct ucounts *ucounts) { unsigned long lock_limit, locked; + long memlock; int allowed = 0; locked = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -828,21 +829,26 @@ int user_shm_lock(size_t size, struct user_struct *user) allowed = 1; lock_limit >>= PAGE_SHIFT; spin_lock(&shmlock_user_lock); - if (!allowed && - locked + user->locked_shm > lock_limit && !capable(CAP_IPC_LOCK)) + memlock = inc_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); + + if (!allowed && (memlock == LONG_MAX || memlock > lock_limit) && !capable(CAP_IPC_LOCK)) { + dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); + goto out; + } + if (!get_ucounts(ucounts)) { + dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, locked); goto out; - get_uid(user); - user->locked_shm += locked; + } allowed = 1; out: spin_unlock(&shmlock_user_lock); return allowed; } -void user_shm_unlock(size_t size, struct user_struct *user) +void user_shm_unlock(size_t size, struct ucounts *ucounts) { spin_lock(&shmlock_user_lock); - user->locked_shm -= (size + PAGE_SIZE - 1) >> PAGE_SHIFT; + dec_rlimit_ucounts(ucounts, UCOUNT_RLIMIT_MEMLOCK, (size + PAGE_SIZE - 1) >> PAGE_SHIFT); spin_unlock(&shmlock_user_lock); - free_uid(user); + put_ucounts(ucounts); } diff --git a/mm/mmap.c b/mm/mmap.c index 3f287599a7a30..99f97d200aa4d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1605,7 +1605,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, goto out_fput; } } else if (flags & MAP_HUGETLB) { - struct user_struct *user = NULL; + struct ucounts *ucounts = NULL; struct hstate *hs; hs = hstate_sizelog((flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); @@ -1621,7 +1621,7 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, */ file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, - &user, HUGETLB_ANONHUGE_INODE, + &ucounts, HUGETLB_ANONHUGE_INODE, (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); if (IS_ERR(file)) return PTR_ERR(file); diff --git a/mm/shmem.c b/mm/shmem.c index b2db4ed0fbc7c..7ee6d27222e9e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2227,7 +2227,7 @@ static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, } #endif -int shmem_lock(struct file *file, int lock, struct user_struct *user) +int shmem_lock(struct file *file, int lock, struct ucounts *ucounts) { struct inode *inode = file_inode(file); struct shmem_inode_info *info = SHMEM_I(inode); @@ -2239,13 +2239,13 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user) * no serialization needed when called from shm_destroy(). */ if (lock && !(info->flags & VM_LOCKED)) { - if (!user_shm_lock(inode->i_size, user)) + if (!user_shm_lock(inode->i_size, ucounts)) goto out_nomem; info->flags |= VM_LOCKED; mapping_set_unevictable(file->f_mapping); } - if (!lock && (info->flags & VM_LOCKED) && user) { - user_shm_unlock(inode->i_size, user); + if (!lock && (info->flags & VM_LOCKED) && ucounts) { + user_shm_unlock(inode->i_size, ucounts); info->flags &= ~VM_LOCKED; mapping_clear_unevictable(file->f_mapping); } @@ -4093,7 +4093,7 @@ int shmem_unuse(unsigned int type, bool frontswap, return 0; } -int shmem_lock(struct file *file, int lock, struct user_struct *user) +int shmem_lock(struct file *file, int lock, struct ucounts *ucounts) { return 0; } -- GitLab From e4aebf06695c32d49f1007f9d252f97b5b2998a7 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 22 Apr 2021 14:27:15 +0200 Subject: [PATCH 0037/3804] kselftests: Add test to check for rlimit changes in different user namespaces The testcase runs few instances of the program with RLIMIT_NPROC=1 from user uid=60000, in different user namespaces. Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/28cafdcdd4abd8494b34a27f1970b666b30de8bf.1619094428.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/rlimits/.gitignore | 2 + tools/testing/selftests/rlimits/Makefile | 6 + tools/testing/selftests/rlimits/config | 1 + .../selftests/rlimits/rlimits-per-userns.c | 161 ++++++++++++++++++ 5 files changed, 171 insertions(+) create mode 100644 tools/testing/selftests/rlimits/.gitignore create mode 100644 tools/testing/selftests/rlimits/Makefile create mode 100644 tools/testing/selftests/rlimits/config create mode 100644 tools/testing/selftests/rlimits/rlimits-per-userns.c diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 6c575cf34a71f..a4ea1481bd9ac 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -48,6 +48,7 @@ TARGETS += proc TARGETS += pstore TARGETS += ptrace TARGETS += openat2 +TARGETS += rlimits TARGETS += rseq TARGETS += rtc TARGETS += seccomp diff --git a/tools/testing/selftests/rlimits/.gitignore b/tools/testing/selftests/rlimits/.gitignore new file mode 100644 index 0000000000000..091021f255b34 --- /dev/null +++ b/tools/testing/selftests/rlimits/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +rlimits-per-userns diff --git a/tools/testing/selftests/rlimits/Makefile b/tools/testing/selftests/rlimits/Makefile new file mode 100644 index 0000000000000..03aadb4062121 --- /dev/null +++ b/tools/testing/selftests/rlimits/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g +TEST_GEN_PROGS := rlimits-per-userns + +include ../lib.mk diff --git a/tools/testing/selftests/rlimits/config b/tools/testing/selftests/rlimits/config new file mode 100644 index 0000000000000..416bd53ce9828 --- /dev/null +++ b/tools/testing/selftests/rlimits/config @@ -0,0 +1 @@ +CONFIG_USER_NS=y diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c new file mode 100644 index 0000000000000..26dc949e93eae --- /dev/null +++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Alexey Gladkov + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NR_CHILDS 2 + +static char *service_prog; +static uid_t user = 60000; +static uid_t group = 60000; + +static void setrlimit_nproc(rlim_t n) +{ + pid_t pid = getpid(); + struct rlimit limit = { + .rlim_cur = n, + .rlim_max = n + }; + + warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n); + + if (setrlimit(RLIMIT_NPROC, &limit) < 0) + err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid); +} + +static pid_t fork_child(void) +{ + pid_t pid = fork(); + + if (pid < 0) + err(EXIT_FAILURE, "fork"); + + if (pid > 0) + return pid; + + pid = getpid(); + + warnx("(pid=%d): New process starting ...", pid); + + if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0) + err(EXIT_FAILURE, "(pid=%d): prctl(PR_SET_PDEATHSIG)", pid); + + signal(SIGUSR1, SIG_DFL); + + warnx("(pid=%d): Changing to uid=%d, gid=%d", pid, user, group); + + if (setgid(group) < 0) + err(EXIT_FAILURE, "(pid=%d): setgid(%d)", pid, group); + if (setuid(user) < 0) + err(EXIT_FAILURE, "(pid=%d): setuid(%d)", pid, user); + + warnx("(pid=%d): Service running ...", pid); + + warnx("(pid=%d): Unshare user namespace", pid); + if (unshare(CLONE_NEWUSER) < 0) + err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)"); + + char *const argv[] = { "service", NULL }; + char *const envp[] = { "I_AM_SERVICE=1", NULL }; + + warnx("(pid=%d): Executing real service ...", pid); + + execve(service_prog, argv, envp); + err(EXIT_FAILURE, "(pid=%d): execve", pid); +} + +int main(int argc, char **argv) +{ + size_t i; + pid_t child[NR_CHILDS]; + int wstatus[NR_CHILDS]; + int childs = NR_CHILDS; + pid_t pid; + + if (getenv("I_AM_SERVICE")) { + pause(); + exit(EXIT_SUCCESS); + } + + service_prog = argv[0]; + pid = getpid(); + + warnx("(pid=%d) Starting testcase", pid); + + /* + * This rlimit is not a problem for root because it can be exceeded. + */ + setrlimit_nproc(1); + + for (i = 0; i < NR_CHILDS; i++) { + child[i] = fork_child(); + wstatus[i] = 0; + usleep(250000); + } + + while (1) { + for (i = 0; i < NR_CHILDS; i++) { + if (child[i] <= 0) + continue; + + errno = 0; + pid_t ret = waitpid(child[i], &wstatus[i], WNOHANG); + + if (!ret || (!WIFEXITED(wstatus[i]) && !WIFSIGNALED(wstatus[i]))) + continue; + + if (ret < 0 && errno != ECHILD) + warn("(pid=%d): waitpid(%d)", pid, child[i]); + + child[i] *= -1; + childs -= 1; + } + + if (!childs) + break; + + usleep(250000); + + for (i = 0; i < NR_CHILDS; i++) { + if (child[i] <= 0) + continue; + kill(child[i], SIGUSR1); + } + } + + for (i = 0; i < NR_CHILDS; i++) { + if (WIFEXITED(wstatus[i])) + warnx("(pid=%d): pid %d exited, status=%d", + pid, -child[i], WEXITSTATUS(wstatus[i])); + else if (WIFSIGNALED(wstatus[i])) + warnx("(pid=%d): pid %d killed by signal %d", + pid, -child[i], WTERMSIG(wstatus[i])); + + if (WIFSIGNALED(wstatus[i]) && WTERMSIG(wstatus[i]) == SIGUSR1) + continue; + + warnx("(pid=%d): Test failed", pid); + exit(EXIT_FAILURE); + } + + warnx("(pid=%d): Test passed", pid); + exit(EXIT_SUCCESS); +} -- GitLab From c1ada3dc7219b02b3467aa906c2f5f8b098578d1 Mon Sep 17 00:00:00 2001 From: Alexey Gladkov Date: Thu, 22 Apr 2021 14:27:16 +0200 Subject: [PATCH 0038/3804] ucounts: Set ucount_max to the largest positive value the type can hold The ns->ucount_max[] is signed long which is less than the rlimit size. We have to protect ucount_max[] from overflow and only use the largest value that we can hold. On 32bit using "long" instead of "unsigned long" to hold the counts has the downside that RLIMIT_MSGQUEUE and RLIMIT_MEMLOCK are limited to 2GiB instead of 4GiB. I don't think anyone cares but it should be mentioned in case someone does. The RLIMIT_NPROC and RLIMIT_SIGPENDING used atomic_t so their maximum hasn't changed. Signed-off-by: Alexey Gladkov Link: https://lkml.kernel.org/r/1825a5dfa18bc5a570e79feb05e2bd07fd57e7e3.1619094428.git.legion@kernel.org Signed-off-by: Eric W. Biederman --- include/linux/user_namespace.h | 6 ++++++ kernel/fork.c | 8 ++++---- kernel/user_namespace.c | 8 ++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 2a3177b9b8bfb..61794ae32fa8f 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -125,6 +125,12 @@ long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v); bool is_ucounts_overlimit(struct ucounts *ucounts, enum ucount_type type, unsigned long max); +static inline void set_rlimit_ucount_max(struct user_namespace *ns, + enum ucount_type type, unsigned long max) +{ + ns->ucount_max[type] = max <= LONG_MAX ? max : LONG_MAX; +} + #ifdef CONFIG_USER_NS static inline struct user_namespace *get_user_ns(struct user_namespace *ns) diff --git a/kernel/fork.c b/kernel/fork.c index 610fd4de60d76..c41820481b2e1 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -822,10 +822,10 @@ void __init fork_init(void) for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) init_user_ns.ucount_max[i] = max_threads/2; - init_user_ns.ucount_max[UCOUNT_RLIMIT_NPROC] = task_rlimit(&init_task, RLIMIT_NPROC); - init_user_ns.ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = task_rlimit(&init_task, RLIMIT_MSGQUEUE); - init_user_ns.ucount_max[UCOUNT_RLIMIT_SIGPENDING] = task_rlimit(&init_task, RLIMIT_SIGPENDING); - init_user_ns.ucount_max[UCOUNT_RLIMIT_MEMLOCK] = task_rlimit(&init_task, RLIMIT_MEMLOCK); + set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_NPROC, task_rlimit(&init_task, RLIMIT_NPROC)); + set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MSGQUEUE, task_rlimit(&init_task, RLIMIT_MSGQUEUE)); + set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_SIGPENDING, task_rlimit(&init_task, RLIMIT_SIGPENDING)); + set_rlimit_ucount_max(&init_user_ns, UCOUNT_RLIMIT_MEMLOCK, task_rlimit(&init_task, RLIMIT_MEMLOCK)); #ifdef CONFIG_VMAP_STACK cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 892da1360862a..d4a545bbab7f5 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -122,10 +122,10 @@ int create_user_ns(struct cred *new) for (i = 0; i < MAX_PER_NAMESPACE_UCOUNTS; i++) { ns->ucount_max[i] = INT_MAX; } - ns->ucount_max[UCOUNT_RLIMIT_NPROC] = rlimit(RLIMIT_NPROC); - ns->ucount_max[UCOUNT_RLIMIT_MSGQUEUE] = rlimit(RLIMIT_MSGQUEUE); - ns->ucount_max[UCOUNT_RLIMIT_SIGPENDING] = rlimit(RLIMIT_SIGPENDING); - ns->ucount_max[UCOUNT_RLIMIT_MEMLOCK] = rlimit(RLIMIT_MEMLOCK); + set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)); + set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MSGQUEUE, rlimit(RLIMIT_MSGQUEUE)); + set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_SIGPENDING, rlimit(RLIMIT_SIGPENDING)); + set_rlimit_ucount_max(ns, UCOUNT_RLIMIT_MEMLOCK, rlimit(RLIMIT_MEMLOCK)); ns->ucounts = ucounts; /* Inherit USERNS_SETGROUPS_ALLOWED from our parent */ -- GitLab From f928ef685db5d9b82c1c1e24e229c167426c5a1f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 30 Apr 2021 13:00:26 -0500 Subject: [PATCH 0039/3804] ucounts: Silence warning in dec_rlimit_ucounts Dan Carpenter wrote: > > url: https://github.com/0day-ci/linux/commits/legion-kernel-org/Count-rlimits-in-each-user-namespace/20210427-162857 > base: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next > config: arc-randconfig-m031-20210426 (attached as .config) > compiler: arceb-elf-gcc (GCC) 9.3.0 > > If you fix the issue, kindly add following tag as appropriate > Reported-by: kernel test robot > Reported-by: Dan Carpenter > > smatch warnings: > kernel/ucount.c:270 dec_rlimit_ucounts() error: uninitialized symbol 'new'. > > vim +/new +270 kernel/ucount.c > > 176ec2b092cc22 Alexey Gladkov 2021-04-22 260 bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) > 176ec2b092cc22 Alexey Gladkov 2021-04-22 261 { > 176ec2b092cc22 Alexey Gladkov 2021-04-22 262 struct ucounts *iter; > 176ec2b092cc22 Alexey Gladkov 2021-04-22 263 long new; > ^^^^^^^^ > > 176ec2b092cc22 Alexey Gladkov 2021-04-22 264 for (iter = ucounts; iter; iter = iter->ns->ucounts) { > 176ec2b092cc22 Alexey Gladkov 2021-04-22 265 long dec = atomic_long_add_return(-v, &iter->ucount[type]); > 176ec2b092cc22 Alexey Gladkov 2021-04-22 266 WARN_ON_ONCE(dec < 0); > 176ec2b092cc22 Alexey Gladkov 2021-04-22 267 if (iter == ucounts) > 176ec2b092cc22 Alexey Gladkov 2021-04-22 268 new = dec; > 176ec2b092cc22 Alexey Gladkov 2021-04-22 269 } > 176ec2b092cc22 Alexey Gladkov 2021-04-22 @270 return (new == 0); > ^^^^^^^^ > I don't know if this is a bug or not, but I can definitely tell why the > static checker complains about it. > > 176ec2b092cc22 Alexey Gladkov 2021-04-22 271 } In the only two cases that care about the return value of dec_rlimit_ucounts the code first tests to see that ucounts is not NULL. In those cases it is guaranteed at least one iteration of the loop will execute guaranteeing the variable new will be initialized. Initialize new to -1 so that the return value is well defined even when the loop does not execute and the static checker is silenced. Link: https://lkml.kernel.org/r/m1tunny77w.fsf@fess.ebiederm.org Signed-off-by: "Eric W. Biederman" --- kernel/ucount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/ucount.c b/kernel/ucount.c index d316bac3e5200..df84a2a639269 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -263,7 +263,7 @@ long inc_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) bool dec_rlimit_ucounts(struct ucounts *ucounts, enum ucount_type type, long v) { struct ucounts *iter; - long new; + long new = -1; /* Silence compiler warning */ for (iter = ucounts; iter; iter = iter->ns->ucounts) { long dec = atomic_long_add_return(-v, &iter->ucount[type]); WARN_ON_ONCE(dec < 0); -- GitLab From ff76d506030daeeeb967be8b8a189bf7aee8e7a8 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Thu, 29 Apr 2021 16:12:26 +1200 Subject: [PATCH 0040/3804] KVM: x86/mmu: Avoid unnecessary page table allocation in kvm_tdp_mmu_map() In kvm_tdp_mmu_map(), while iterating TDP MMU page table entries, it is possible SPTE has already been frozen by another thread but the frozen is not done yet, for instance, when another thread is still in middle of zapping large page. In this case, the !is_shadow_present_pte() check for old SPTE in tdp_mmu_for_each_pte() may hit true, and in this case allocating new page table is unnecessary since tdp_mmu_set_spte_atomic() later will return false and page table will need to be freed. Add is_removed_spte() check before allocating new page table to avoid this. Signed-off-by: Kai Huang Message-Id: <20210429041226.50279-1-kai.huang@intel.com> Reviewed-by: Ben Gardon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 88f69a6cc4922..3c8284841bed4 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1009,6 +1009,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, } if (!is_shadow_present_pte(iter.old_spte)) { + /* + * If SPTE has been forzen by another thread, just + * give up and retry, avoiding unnecessary page table + * allocation and free. + */ + if (is_removed_spte(iter.old_spte)) + break; + sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); child_pt = sp->spt; -- GitLab From 1699f65c8b658d434fe92563c906cd1a136c9cb6 Mon Sep 17 00:00:00 2001 From: "Shahin, Md Shahadat Hossain" Date: Fri, 30 Apr 2021 11:52:31 +0000 Subject: [PATCH 0041/3804] kvm/x86: Fix 'lpages' kvm stat for TDM MMU Large pages not being created properly may result in increased memory access time. The 'lpages' kvm stat used to keep track of the current number of large pages in the system, but with TDP MMU enabled the stat is not showing the correct number. This patch extends the lpages counter to cover the TDP case. Signed-off-by: Md Shahadat Hossain Shahin Cc: Bartosz Szczepanek Message-Id: <1619783551459.35424@amazon.de> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 3c8284841bed4..c743894fe0b7c 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -444,6 +444,13 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte); + if (is_large_pte(old_spte) != is_large_pte(new_spte)) { + if (is_large_pte(old_spte)) + atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages); + else + atomic64_add(1, (atomic64_t*)&kvm->stat.lpages); + } + /* * The only times a SPTE should be changed from a non-present to * non-present state is when an MMIO entry is installed/modified/ -- GitLab From d981dd15498b188636ec5a7d8ad485e650f63d8d Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 28 Apr 2021 19:08:02 +0800 Subject: [PATCH 0042/3804] KVM: LAPIC: Accurately guarantee busy wait for timer to expire when using hv_timer Commit ee66e453db13d (KVM: lapic: Busy wait for timer to expire when using hv_timer) tries to set ktime->expired_tscdeadline by checking ktime->hv_timer_in_use since lapic timer oneshot/periodic modes which are emulated by vmx preemption timer also get advanced, they leverage the same vmx preemption timer logic with tsc-deadline mode. However, ktime->hv_timer_in_use is cleared before apic_timer_expired() handling, let's delay this clearing in preemption-disabled region. Fixes: ee66e453db13d ("KVM: lapic: Busy wait for timer to expire when using hv_timer") Reviewed-by: Sean Christopherson Signed-off-by: Wanpeng Li Message-Id: <1619608082-4187-1-git-send-email-wanpengli@tencent.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 152591f9243ab..c0ebef560bd14 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1913,8 +1913,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) if (!apic->lapic_timer.hv_timer_in_use) goto out; WARN_ON(rcuwait_active(&vcpu->wait)); - cancel_hv_timer(apic); apic_timer_expired(apic, false); + cancel_hv_timer(apic); if (apic_lvtt_period(apic) && apic->lapic_timer.period) { advance_periodic_target_expiration(apic); -- GitLab From 262de4102c7bb8e59f26a967a8ffe8cce85cc537 Mon Sep 17 00:00:00 2001 From: Benjamin Segall Date: Thu, 29 Apr 2021 16:22:34 +0000 Subject: [PATCH 0043/3804] kvm: exit halt polling on need_resched() as well single_task_running() is usually more general than need_resched() but CFS_BANDWIDTH throttling will use resched_task() when there is just one task to get the task to block. This was causing long-need_resched warnings and was likely allowing VMs to overrun their quota when halt polling. Signed-off-by: Ben Segall Signed-off-by: Venkatesh Srinivas Message-Id: <20210429162233.116849-1-venkateshs@chromium.org> Signed-off-by: Paolo Bonzini Cc: stable@vger.kernel.org Reviewed-by: Jim Mattson --- virt/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2799c6660ccea..b9f12da6af0ea 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2973,7 +2973,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) goto out; } poll_end = cur = ktime_get(); - } while (single_task_running() && ktime_before(cur, stop)); + } while (single_task_running() && !need_resched() && + ktime_before(cur, stop)); } prepare_to_rcuwait(&vcpu->wait); -- GitLab From deee59bacb2402c20e6b1b6800f9a5127367eb2a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 3 May 2021 15:54:42 +0300 Subject: [PATCH 0044/3804] KVM: nSVM: fix a typo in svm_leave_nested When forcibly leaving the nested mode, we should switch to vmcb01 Fixes: 4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest") Signed-off-by: Maxim Levitsky Message-Id: <20210503125446.1353307-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 540d43ba2cf46..3321220f3deb7 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -886,7 +886,7 @@ void svm_leave_nested(struct vcpu_svm *svm) svm->nested.nested_run_pending = 0; leave_guest_mode(vcpu); - svm_switch_vmcb(svm, &svm->nested.vmcb02); + svm_switch_vmcb(svm, &svm->vmcb01); nested_svm_uninit_mmu_context(vcpu); vmcb_mark_all_dirty(svm->vmcb); -- GitLab From c74ad08f3333db2e44d3346b863f6d10d35e37dd Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 3 May 2021 15:54:43 +0300 Subject: [PATCH 0045/3804] KVM: nSVM: fix few bugs in the vmcb02 caching logic * Define and use an invalid GPA (all ones) for init value of last and current nested vmcb physical addresses. * Reset the current vmcb12 gpa to the invalid value when leaving the nested mode, similar to what is done on nested vmexit. * Reset the last seen vmcb12 address when disabling the nested SVM, as it relies on vmcb02 fields which are freed at that point. Fixes: 4995a3685f1b ("KVM: SVM: Use a separate vmcb for the nested L2 guest") Signed-off-by: Maxim Levitsky Message-Id: <20210503125446.1353307-3-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/svm/nested.c | 11 +++++++++++ arch/x86/kvm/svm/svm.c | 4 ++-- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index cbbcee0a84f92..848956bb3cf1d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -113,6 +113,7 @@ #define VALID_PAGE(x) ((x) != INVALID_PAGE) #define UNMAPPED_GVA (~(gpa_t)0) +#define INVALID_GPA (~(gpa_t)0) /* KVM Hugepage definitions for x86 */ #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 3321220f3deb7..a88c64e004c3d 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -872,6 +872,15 @@ void svm_free_nested(struct vcpu_svm *svm) __free_page(virt_to_page(svm->nested.vmcb02.ptr)); svm->nested.vmcb02.ptr = NULL; + /* + * When last_vmcb12_gpa matches the current vmcb12 gpa, + * some vmcb12 fields are not loaded if they are marked clean + * in the vmcb12, since in this case they are up to date already. + * + * When the vmcb02 is freed, this optimization becomes invalid. + */ + svm->nested.last_vmcb12_gpa = INVALID_GPA; + svm->nested.initialized = false; } @@ -884,6 +893,8 @@ void svm_leave_nested(struct vcpu_svm *svm) if (is_guest_mode(vcpu)) { svm->nested.nested_run_pending = 0; + svm->nested.vmcb12_gpa = INVALID_GPA; + leave_guest_mode(vcpu); svm_switch_vmcb(svm, &svm->vmcb01); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 9790c73f2a325..be5cf612ab1fe 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1235,8 +1235,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm->current_vmcb->asid_generation = 0; svm->asid = 0; - svm->nested.vmcb12_gpa = 0; - svm->nested.last_vmcb12_gpa = 0; + svm->nested.vmcb12_gpa = INVALID_GPA; + svm->nested.last_vmcb12_gpa = INVALID_GPA; vcpu->arch.hflags = 0; if (!kvm_pause_in_guest(vcpu->kvm)) { -- GitLab From 9d290e16432cacd448475d38dec2753b75b9665f Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Mon, 3 May 2021 15:54:44 +0300 Subject: [PATCH 0046/3804] KVM: nSVM: leave the guest mode prior to loading a nested state This allows the KVM to load the nested state more than once without warnings. Signed-off-by: Maxim Levitsky Message-Id: <20210503125446.1353307-4-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index a88c64e004c3d..32400cba608d4 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1309,12 +1309,15 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, * L2 registers if needed are moved from the current VMCB to VMCB02. */ + if (is_guest_mode(vcpu)) + svm_leave_nested(svm); + else + svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; + svm->nested.nested_run_pending = !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; - if (svm->current_vmcb == &svm->vmcb01) - svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; svm->vmcb01.ptr->save.es = save->es; svm->vmcb01.ptr->save.cs = save->cs; -- GitLab From 7f6231a39117c2781beead59d6ae4923c2703147 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 3 May 2021 16:24:46 +1200 Subject: [PATCH 0047/3804] KVM: x86/mmu: Fix kdoc of __handle_changed_spte The function name of kdoc of __handle_changed_spte() should be itself, rather than handle_changed_spte(). Fix the typo. Signed-off-by: Kai Huang Message-Id: <20210503042446.154695-1-kai.huang@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index c743894fe0b7c..95eeb5ac6a8a7 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -388,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, } /** - * handle_changed_spte - handle bookkeeping associated with an SPTE change + * __handle_changed_spte - handle bookkeeping associated with an SPTE change * @kvm: kvm instance * @as_id: the address space of the paging structure the SPTE was a part of * @gfn: the base GFN that was mapped by the SPTE -- GitLab From 8899a5fc7da516460f841189a28aac0b52b554fd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 30 Apr 2021 18:03:03 +0100 Subject: [PATCH 0048/3804] KVM: x86: Fix potential fput on a null source_kvm_file The fget can potentially return null, so the fput on the error return path can cause a null pointer dereference. Fix this by checking for a null source_kvm_file before doing a fput. Addresses-Coverity: ("Dereference null return") Fixes: 54526d1fd593 ("KVM: x86: Support KVM VMs sharing SEV context") Signed-off-by: Colin Ian King Message-Id: <20210430170303.131924-1-colin.king@canonical.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 1356ee095cd55..8b11c711a0e40 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -1764,7 +1764,8 @@ e_mirror_unlock: e_source_unlock: mutex_unlock(&source_kvm->lock); e_source_put: - fput(source_kvm_file); + if (source_kvm_file) + fput(source_kvm_file); return ret; } -- GitLab From 5e753a817b2d5991dfe8a801b7b1e8e79a1c5a20 Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Fri, 30 Apr 2021 19:59:51 +0800 Subject: [PATCH 0049/3804] btrfs: fix unmountable seed device after fstrim The following test case reproduces an issue of wrongly freeing in-use blocks on the readonly seed device when fstrim is called on the rw sprout device. As shown below. Create a seed device and add a sprout device to it: $ mkfs.btrfs -fq -dsingle -msingle /dev/loop0 $ btrfstune -S 1 /dev/loop0 $ mount /dev/loop0 /btrfs $ btrfs dev add -f /dev/loop1 /btrfs BTRFS info (device loop0): relocating block group 290455552 flags system BTRFS info (device loop0): relocating block group 1048576 flags system BTRFS info (device loop0): disk added /dev/loop1 $ umount /btrfs Mount the sprout device and run fstrim: $ mount /dev/loop1 /btrfs $ fstrim /btrfs $ umount /btrfs Now try to mount the seed device, and it fails: $ mount /dev/loop0 /btrfs mount: /btrfs: wrong fs type, bad option, bad superblock on /dev/loop0, missing codepage or helper program, or other error. Block 5292032 is missing on the readonly seed device: $ dmesg -kt | tail BTRFS error (device loop0): bad tree block start, want 5292032 have 0 BTRFS warning (device loop0): couldn't read-tree root BTRFS error (device loop0): open_ctree failed From the dump-tree of the seed device (taken before the fstrim). Block 5292032 belonged to the block group starting at 5242880: $ btrfs inspect dump-tree -e /dev/loop0 | grep -A1 BLOCK_GROUP item 3 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16169 itemsize 24 block group used 114688 chunk_objectid 256 flags METADATA From the dump-tree of the sprout device (taken before the fstrim). fstrim used block-group 5242880 to find the related free space to free: $ btrfs inspect dump-tree -e /dev/loop1 | grep -A1 BLOCK_GROUP item 1 key (5242880 BLOCK_GROUP_ITEM 8388608) itemoff 16226 itemsize 24 block group used 32768 chunk_objectid 256 flags METADATA BPF kernel tracing the fstrim command finds the missing block 5292032 within the range of the discarded blocks as below: kprobe:btrfs_discard_extent { printf("freeing start %llu end %llu num_bytes %llu:\n", arg1, arg1+arg2, arg2); } freeing start 5259264 end 5406720 num_bytes 147456 Fix this by avoiding the discard command to the readonly seed device. Reported-by: Chris Murphy CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Anand Jain Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 7a28314189b4a..f1d15b68994a0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1340,12 +1340,16 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, stripe = bbio->stripes; for (i = 0; i < bbio->num_stripes; i++, stripe++) { u64 bytes; + struct btrfs_device *device = stripe->dev; - if (!stripe->dev->bdev) { + if (!device->bdev) { ASSERT(btrfs_test_opt(fs_info, DEGRADED)); continue; } + if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) + continue; + ret = do_discard_extent(stripe, &bytes); if (!ret) { discarded_bytes += bytes; -- GitLab From 784daf2b9628f2d0117f1f0b578cfe5ab6634919 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Fri, 30 Apr 2021 15:34:17 +0200 Subject: [PATCH 0050/3804] btrfs: zoned: sanity check zone type The fstests test case generic/475 creates a dm-linear device that gets changed to a dm-error device. This leads to errors in loading the block group's zone information when running on a zoned file system, ultimately resulting in a list corruption. When running on a kernel with list debugging enabled this leads to the following crash. BTRFS: error (device dm-2) in cleanup_transaction:1953: errno=-5 IO failure kernel BUG at lib/list_debug.c:54! invalid opcode: 0000 [#1] SMP PTI CPU: 1 PID: 2433 Comm: umount Tainted: G W 5.12.0+ #1018 RIP: 0010:__list_del_entry_valid.cold+0x1d/0x47 RSP: 0018:ffffc90001473df0 EFLAGS: 00010296 RAX: 0000000000000054 RBX: ffff8881038fd000 RCX: ffffc90001473c90 RDX: 0000000100001a31 RSI: 0000000000000003 RDI: 0000000000000003 RBP: ffff888308871108 R08: 0000000000000003 R09: 0000000000000001 R10: 3961373532383838 R11: 6666666620736177 R12: ffff888308871000 R13: ffff8881038fd088 R14: ffff8881038fdc78 R15: dead000000000100 FS: 00007f353c9b1540(0000) GS:ffff888627d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f353cc2c710 CR3: 000000018e13c000 CR4: 00000000000006a0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: btrfs_free_block_groups+0xc9/0x310 [btrfs] close_ctree+0x2ee/0x31a [btrfs] ? call_rcu+0x8f/0x270 ? mutex_lock+0x1c/0x40 generic_shutdown_super+0x67/0x100 kill_anon_super+0x14/0x30 btrfs_kill_super+0x12/0x20 [btrfs] deactivate_locked_super+0x31/0x90 cleanup_mnt+0x13e/0x1b0 task_work_run+0x63/0xb0 exit_to_user_mode_loop+0xd9/0xe0 exit_to_user_mode_prepare+0x3e/0x60 syscall_exit_to_user_mode+0x1d/0x50 entry_SYSCALL_64_after_hwframe+0x44/0xae As dm-error has no support for zones, btrfs will run it's zone emulation mode on this device. The zone emulation mode emulates conventional zones, so bail out if the zone bitmap that gets populated on mount sees the zone as sequential while we're thinking it's a conventional zone when creating a block group. Note: this scenario is unlikely in a real wold application and can only happen by this (ab)use of device-mapper targets. CC: stable@vger.kernel.org # 5.12+ Signed-off-by: Naohiro Aota Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/zoned.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 70b23a0d03b10..304ce64c70a44 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1126,6 +1126,11 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } + if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { + ret = -EIO; + goto out; + } + switch (zone.cond) { case BLK_ZONE_COND_OFFLINE: case BLK_ZONE_COND_READONLY: -- GitLab From 77364faf21b4105ee5adbb4844fdfb461334d249 Mon Sep 17 00:00:00 2001 From: Tom Rix Date: Fri, 30 Apr 2021 11:06:55 -0700 Subject: [PATCH 0051/3804] btrfs: initialize return variable in cleanup_free_space_cache_v1 Static analysis reports this problem free-space-cache.c:3965:2: warning: Undefined or garbage value returned return ret; ^~~~~~~~~~ ret is set in the node handling loop. Treat doing nothing as a success and initialize ret to 0, although it's unlikely the loop would be skipped. We always have block groups, but as it could lead to transaction abort in the caller it's better to be safe. CC: stable@vger.kernel.org # 5.12+ Signed-off-by: Tom Rix Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/free-space-cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e54466fc101f7..4806295116d88 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -3949,7 +3949,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info, { struct btrfs_block_group *block_group; struct rb_node *node; - int ret; + int ret = 0; btrfs_info(fs_info, "cleaning free space cache v1"); -- GitLab From 0a269a008f837e76ce285679ab3005059fadc2a6 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Apr 2021 14:35:40 +0200 Subject: [PATCH 0052/3804] x86/kvm: Fix pr_info() for async PF setup/teardown 'pr_fmt' already has 'kvm-guest: ' so 'KVM' prefix is redundant. "Unregister pv shared memory" is very ambiguous, it's hard to say which particular PV feature it relates to. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210414123544.1060604-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d307c22e5c188..dc440bb692223 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -345,7 +345,7 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); - pr_info("KVM setup async PF for cpu %d\n", smp_processor_id()); + pr_info("setup async PF for cpu %d\n", smp_processor_id()); } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { @@ -371,7 +371,7 @@ static void kvm_pv_disable_apf(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); __this_cpu_write(apf_reason.enabled, 0); - pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id()); + pr_info("disable async PF for cpu %d\n", smp_processor_id()); } static void kvm_pv_guest_cpu_reboot(void *unused) -- GitLab From 9f015b3765bf593b3ed5d3b588e409dc0ffa9f85 Mon Sep 17 00:00:00 2001 From: Rijo Thomas Date: Wed, 14 Apr 2021 23:08:27 +0530 Subject: [PATCH 0053/3804] tee: amdtee: unload TA only when its refcount becomes 0 Same Trusted Application (TA) can be loaded in multiple TEE contexts. If it is a single instance TA, the TA should not get unloaded from AMD Secure Processor, while it is still in use in another TEE context. Therefore reference count TA and unload it when the count becomes zero. Fixes: 757cc3e9ff1d ("tee: add AMD-TEE driver") Reviewed-by: Devaraj Rangasamy Signed-off-by: Rijo Thomas Acked-by: Dan Carpenter Signed-off-by: Jens Wiklander --- drivers/tee/amdtee/amdtee_private.h | 13 ++++ drivers/tee/amdtee/call.c | 94 ++++++++++++++++++++++++++--- drivers/tee/amdtee/core.c | 15 +++-- 3 files changed, 106 insertions(+), 16 deletions(-) diff --git a/drivers/tee/amdtee/amdtee_private.h b/drivers/tee/amdtee/amdtee_private.h index 337c8d82f74eb..6d0f7062bb870 100644 --- a/drivers/tee/amdtee/amdtee_private.h +++ b/drivers/tee/amdtee/amdtee_private.h @@ -21,6 +21,7 @@ #define TEEC_SUCCESS 0x00000000 #define TEEC_ERROR_GENERIC 0xFFFF0000 #define TEEC_ERROR_BAD_PARAMETERS 0xFFFF0006 +#define TEEC_ERROR_OUT_OF_MEMORY 0xFFFF000C #define TEEC_ERROR_COMMUNICATION 0xFFFF000E #define TEEC_ORIGIN_COMMS 0x00000002 @@ -93,6 +94,18 @@ struct amdtee_shm_data { u32 buf_id; }; +/** + * struct amdtee_ta_data - Keeps track of all TAs loaded in AMD Secure + * Processor + * @ta_handle: Handle to TA loaded in TEE + * @refcount: Reference count for the loaded TA + */ +struct amdtee_ta_data { + struct list_head list_node; + u32 ta_handle; + u32 refcount; +}; + #define LOWER_TWO_BYTE_MASK 0x0000FFFF /** diff --git a/drivers/tee/amdtee/call.c b/drivers/tee/amdtee/call.c index 096dd4d92d39c..07f36ac834c88 100644 --- a/drivers/tee/amdtee/call.c +++ b/drivers/tee/amdtee/call.c @@ -121,15 +121,69 @@ static int amd_params_to_tee_params(struct tee_param *tee, u32 count, return ret; } +static DEFINE_MUTEX(ta_refcount_mutex); +static struct list_head ta_list = LIST_HEAD_INIT(ta_list); + +static u32 get_ta_refcount(u32 ta_handle) +{ + struct amdtee_ta_data *ta_data; + u32 count = 0; + + /* Caller must hold a mutex */ + list_for_each_entry(ta_data, &ta_list, list_node) + if (ta_data->ta_handle == ta_handle) + return ++ta_data->refcount; + + ta_data = kzalloc(sizeof(*ta_data), GFP_KERNEL); + if (ta_data) { + ta_data->ta_handle = ta_handle; + ta_data->refcount = 1; + count = ta_data->refcount; + list_add(&ta_data->list_node, &ta_list); + } + + return count; +} + +static u32 put_ta_refcount(u32 ta_handle) +{ + struct amdtee_ta_data *ta_data; + u32 count = 0; + + /* Caller must hold a mutex */ + list_for_each_entry(ta_data, &ta_list, list_node) + if (ta_data->ta_handle == ta_handle) { + count = --ta_data->refcount; + if (count == 0) { + list_del(&ta_data->list_node); + kfree(ta_data); + break; + } + } + + return count; +} + int handle_unload_ta(u32 ta_handle) { struct tee_cmd_unload_ta cmd = {0}; - u32 status; + u32 status, count; int ret; if (!ta_handle) return -EINVAL; + mutex_lock(&ta_refcount_mutex); + + count = put_ta_refcount(ta_handle); + + if (count) { + pr_debug("unload ta: not unloading %u count %u\n", + ta_handle, count); + ret = -EBUSY; + goto unlock; + } + cmd.ta_handle = ta_handle; ret = psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA, (void *)&cmd, @@ -137,8 +191,12 @@ int handle_unload_ta(u32 ta_handle) if (!ret && status != 0) { pr_err("unload ta: status = 0x%x\n", status); ret = -EBUSY; + } else { + pr_debug("unloaded ta handle %u\n", ta_handle); } +unlock: + mutex_unlock(&ta_refcount_mutex); return ret; } @@ -340,7 +398,8 @@ int handle_open_session(struct tee_ioctl_open_session_arg *arg, u32 *info, int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg) { - struct tee_cmd_load_ta cmd = {0}; + struct tee_cmd_unload_ta unload_cmd = {}; + struct tee_cmd_load_ta load_cmd = {}; phys_addr_t blob; int ret; @@ -353,21 +412,36 @@ int handle_load_ta(void *data, u32 size, struct tee_ioctl_open_session_arg *arg) return -EINVAL; } - cmd.hi_addr = upper_32_bits(blob); - cmd.low_addr = lower_32_bits(blob); - cmd.size = size; + load_cmd.hi_addr = upper_32_bits(blob); + load_cmd.low_addr = lower_32_bits(blob); + load_cmd.size = size; - ret = psp_tee_process_cmd(TEE_CMD_ID_LOAD_TA, (void *)&cmd, - sizeof(cmd), &arg->ret); + mutex_lock(&ta_refcount_mutex); + + ret = psp_tee_process_cmd(TEE_CMD_ID_LOAD_TA, (void *)&load_cmd, + sizeof(load_cmd), &arg->ret); if (ret) { arg->ret_origin = TEEC_ORIGIN_COMMS; arg->ret = TEEC_ERROR_COMMUNICATION; - } else { - set_session_id(cmd.ta_handle, 0, &arg->session); + } else if (arg->ret == TEEC_SUCCESS) { + ret = get_ta_refcount(load_cmd.ta_handle); + if (!ret) { + arg->ret_origin = TEEC_ORIGIN_COMMS; + arg->ret = TEEC_ERROR_OUT_OF_MEMORY; + + /* Unload the TA on error */ + unload_cmd.ta_handle = load_cmd.ta_handle; + psp_tee_process_cmd(TEE_CMD_ID_UNLOAD_TA, + (void *)&unload_cmd, + sizeof(unload_cmd), &ret); + } else { + set_session_id(load_cmd.ta_handle, 0, &arg->session); + } } + mutex_unlock(&ta_refcount_mutex); pr_debug("load TA: TA handle = 0x%x, RO = 0x%x, ret = 0x%x\n", - cmd.ta_handle, arg->ret_origin, arg->ret); + load_cmd.ta_handle, arg->ret_origin, arg->ret); return 0; } diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index 8a6a8f30bb427..da6b88e80dc07 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -59,10 +59,9 @@ static void release_session(struct amdtee_session *sess) continue; handle_close_session(sess->ta_handle, sess->session_info[i]); + handle_unload_ta(sess->ta_handle); } - /* Unload Trusted Application once all sessions are closed */ - handle_unload_ta(sess->ta_handle); kfree(sess); } @@ -224,8 +223,6 @@ static void destroy_session(struct kref *ref) struct amdtee_session *sess = container_of(ref, struct amdtee_session, refcount); - /* Unload the TA from TEE */ - handle_unload_ta(sess->ta_handle); mutex_lock(&session_list_mutex); list_del(&sess->list_node); mutex_unlock(&session_list_mutex); @@ -238,7 +235,7 @@ int amdtee_open_session(struct tee_context *ctx, { struct amdtee_context_data *ctxdata = ctx->data; struct amdtee_session *sess = NULL; - u32 session_info; + u32 session_info, ta_handle; size_t ta_size; int rc, i; void *ta; @@ -259,11 +256,14 @@ int amdtee_open_session(struct tee_context *ctx, if (arg->ret != TEEC_SUCCESS) goto out; + ta_handle = get_ta_handle(arg->session); + mutex_lock(&session_list_mutex); sess = alloc_session(ctxdata, arg->session); mutex_unlock(&session_list_mutex); if (!sess) { + handle_unload_ta(ta_handle); rc = -ENOMEM; goto out; } @@ -277,6 +277,7 @@ int amdtee_open_session(struct tee_context *ctx, if (i >= TEE_NUM_SESSIONS) { pr_err("reached maximum session count %d\n", TEE_NUM_SESSIONS); + handle_unload_ta(ta_handle); kref_put(&sess->refcount, destroy_session); rc = -ENOMEM; goto out; @@ -289,12 +290,13 @@ int amdtee_open_session(struct tee_context *ctx, spin_lock(&sess->lock); clear_bit(i, sess->sess_mask); spin_unlock(&sess->lock); + handle_unload_ta(ta_handle); kref_put(&sess->refcount, destroy_session); goto out; } sess->session_info[i] = session_info; - set_session_id(sess->ta_handle, i, &arg->session); + set_session_id(ta_handle, i, &arg->session); out: free_pages((u64)ta, get_order(ta_size)); return rc; @@ -329,6 +331,7 @@ int amdtee_close_session(struct tee_context *ctx, u32 session) /* Close the session */ handle_close_session(ta_handle, session_info); + handle_unload_ta(ta_handle); kref_put(&sess->refcount, destroy_session); -- GitLab From 6a01268687c8d00e59dff341c519a337de980d2e Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Sun, 7 Feb 2021 13:47:04 -0500 Subject: [PATCH 0054/3804] HID: semitek: new driver for GK6X series keyboards A number of USB keyboards, using the Semitek firmware, are capable of handling arbitrary N-key rollover, but due to a buggy report descriptor, keys beyond the sixth cannot be detected by the generic HID driver. There are numerous hardware variants sold by several vendors, mostly using generic names like "GK61" for the 61-key version. These keyboards are sometimes known collectively as the "GK6X" series. The keyboard has three USB interfaces. Interface 0 uses the standard HID boot protocol, limited to eight modifier keys and six normal keys; interface 2 uses a custom report format that permits any number of keys. If more than six keys are pressed simultaneously, the first six are reported via interface 0 while subsequent keys are reported via interface 2. (Interface 1 uses a custom protocol for reprogramming the keyboard; this can be controlled through userspace tools and is not of concern for the present driver.) The report descriptor for interface 2, however, is incorrect (for report ID 0x04, the input field is marked as "array" rather than "variable".) The descriptor appears to be correct in other respects, so we simply replace the incorrect byte before parsing the descriptor. Signed-off-by: Benjamin Moody Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 15 +++++++++++++++ drivers/hid/Makefile | 1 + drivers/hid/hid-ids.h | 3 +++ drivers/hid/hid-semitek.c | 40 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+) create mode 100644 drivers/hid/hid-semitek.c diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 4bf263c2d61a4..5756203af068d 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -922,6 +922,21 @@ config HID_SAMSUNG help Support for Samsung InfraRed remote control or keyboards. +config HID_SEMITEK + tristate "Semitek USB keyboards" + depends on HID + help + Support for Semitek USB keyboards that are not fully compliant + with the HID standard. + + There are many variants, including: + - GK61, GK64, GK68, GK84, GK96, etc. + - SK61, SK64, SK68, SK84, SK96, etc. + - Dierya DK61/DK66 + - Tronsmart TK09R + - Woo-dy + - X-Bows Nature/Knight + config HID_SONY tristate "Sony PS2/3/4 accessories" depends on USB_HID diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile index 193431ec4db84..1ea1a7c0b20fe 100644 --- a/drivers/hid/Makefile +++ b/drivers/hid/Makefile @@ -106,6 +106,7 @@ obj-$(CONFIG_HID_ROCCAT) += hid-roccat.o hid-roccat-common.o \ obj-$(CONFIG_HID_RMI) += hid-rmi.o obj-$(CONFIG_HID_SAITEK) += hid-saitek.o obj-$(CONFIG_HID_SAMSUNG) += hid-samsung.o +obj-$(CONFIG_HID_SEMITEK) += hid-semitek.o obj-$(CONFIG_HID_SMARTJOYPLUS) += hid-sjoy.o obj-$(CONFIG_HID_SONY) += hid-sony.o obj-$(CONFIG_HID_SPEEDLINK) += hid-speedlink.o diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 84b8da3e7d09a..e0d8dab18a7db 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1060,6 +1060,9 @@ #define USB_DEVICE_ID_SEMICO_USB_KEYKOARD 0x0023 #define USB_DEVICE_ID_SEMICO_USB_KEYKOARD2 0x0027 +#define USB_VENDOR_ID_SEMITEK 0x1ea7 +#define USB_DEVICE_ID_SEMITEK_KEYBOARD 0x0907 + #define USB_VENDOR_ID_SENNHEISER 0x1395 #define USB_DEVICE_ID_SENNHEISER_BTD500USB 0x002c diff --git a/drivers/hid/hid-semitek.c b/drivers/hid/hid-semitek.c new file mode 100644 index 0000000000000..ba6607d5e0510 --- /dev/null +++ b/drivers/hid/hid-semitek.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * HID driver for Semitek keyboards + * + * Copyright (c) 2021 Benjamin Moody + */ + +#include +#include +#include + +#include "hid-ids.h" + +static __u8 *semitek_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *rsize) +{ + /* In the report descriptor for interface 2, fix the incorrect + description of report ID 0x04 (the report contains a + bitmask, not an array of keycodes.) */ + if (*rsize == 0xcb && rdesc[0x83] == 0x81 && rdesc[0x84] == 0x00) { + hid_info(hdev, "fixing up Semitek report descriptor\n"); + rdesc[0x84] = 0x02; + } + return rdesc; +} + +static const struct hid_device_id semitek_devices[] = { + { HID_USB_DEVICE(USB_VENDOR_ID_SEMITEK, USB_DEVICE_ID_SEMITEK_KEYBOARD) }, + { } +}; +MODULE_DEVICE_TABLE(hid, semitek_devices); + +static struct hid_driver semitek_driver = { + .name = "semitek", + .id_table = semitek_devices, + .report_fixup = semitek_report_fixup, +}; +module_hid_driver(semitek_driver); + +MODULE_LICENSE("GPL"); -- GitLab From 4bfb2c72b2bfca8684c2f5c25a3119bad016a9d3 Mon Sep 17 00:00:00 2001 From: Luke D Jones Date: Fri, 19 Feb 2021 10:38:46 +1300 Subject: [PATCH 0055/3804] HID: asus: Filter keyboard EC for old ROG keyboard Older ROG keyboards emit a similar stream of bytes to the new N-Key keyboards and require filtering to prevent a lot of unmapped key warnings showing. As all the ROG keyboards use QUIRK_USE_KBD_BACKLIGHT this is now used to branch to filtering in asus_raw_event. Signed-off-by: Luke D Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 2ab22b9259418..1ed1c05c3d542 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -335,7 +335,7 @@ static int asus_raw_event(struct hid_device *hdev, if (drvdata->quirks & QUIRK_MEDION_E1239T) return asus_e1239t_event(drvdata, data, size); - if (drvdata->quirks & QUIRK_ROG_NKEY_KEYBOARD) { + if (drvdata->quirks & QUIRK_USE_KBD_BACKLIGHT) { /* * Skip these report ID, the device emits a continuous stream associated * with the AURA mode it is in which looks like an 'echo'. -- GitLab From 25bdbfbb2d8331a67824dd03d0087e9c98835f3a Mon Sep 17 00:00:00 2001 From: Nirenjan Krishnan Date: Mon, 29 Mar 2021 09:10:02 -0700 Subject: [PATCH 0056/3804] HID: quirks: Set INCREMENT_USAGE_ON_DUPLICATE for Saitek X65 The Saitek X65 joystick has a pair of axes that were used as mouse pointer controls by the Windows driver. The corresponding usage page is the Game Controls page, which is not recognized by the generic HID driver, and therefore, both axes get mapped to ABS_MISC. The quirk makes the second axis get mapped to ABS_MISC+1, and therefore made available separately. Signed-off-by: Nirenjan Krishnan Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index e0d8dab18a7db..7601ec19ab289 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1051,6 +1051,7 @@ #define USB_DEVICE_ID_SAITEK_X52 0x075c #define USB_DEVICE_ID_SAITEK_X52_2 0x0255 #define USB_DEVICE_ID_SAITEK_X52_PRO 0x0762 +#define USB_DEVICE_ID_SAITEK_X65 0x0b6a #define USB_VENDOR_ID_SAMSUNG 0x0419 #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE 0x0001 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 3dd6f15f2a67f..152c6aab11b51 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -158,6 +158,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X52), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X52_2), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X52_PRO), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, + { HID_USB_DEVICE(USB_VENDOR_ID_SAITEK, USB_DEVICE_ID_SAITEK_X65), HID_QUIRK_INCREMENT_USAGE_ON_DUPLICATE }, { HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SEMICO, USB_DEVICE_ID_SEMICO_USB_KEYKOARD), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SENNHEISER, USB_DEVICE_ID_SENNHEISER_BTD500USB), HID_QUIRK_NOGET }, -- GitLab From ed1ab6ff213a701d4a635883c63e0d6fcbbab27d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Fri, 2 Apr 2021 09:40:41 +0000 Subject: [PATCH 0057/3804] HID: thrustmaster: fix return value check in thrustmaster_probe() Fix the return value check which testing the wrong variable in thrustmaster_probe(). Fixes: c49c33637802 ("HID: support for initialization of some Thrustmaster wheels") Reported-by: Hulk Robot Signed-off-by: Wei Yongjun Signed-off-by: Jiri Kosina --- drivers/hid/hid-thrustmaster.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hid/hid-thrustmaster.c b/drivers/hid/hid-thrustmaster.c index 2e452c6e8ef40..f643b1cb112d5 100644 --- a/drivers/hid/hid-thrustmaster.c +++ b/drivers/hid/hid-thrustmaster.c @@ -312,7 +312,7 @@ static int thrustmaster_probe(struct hid_device *hdev, const struct hid_device_i } tm_wheel->change_request = kzalloc(sizeof(struct usb_ctrlrequest), GFP_KERNEL); - if (!tm_wheel->model_request) { + if (!tm_wheel->change_request) { ret = -ENOMEM; goto error5; } -- GitLab From ed80bdc4571fae177c44eba0997a0d551fc21e15 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sun, 4 Apr 2021 17:40:54 +0200 Subject: [PATCH 0058/3804] HID: quirks: Add HID_QUIRK_NO_INIT_REPORTS quirk for Dell K15A keyboard-dock Just like the K12A the Dell K15A keyboard-dock has problems with get_feature requests. This sometimes leads to several "failed to fetch feature 8" messages getting logged, after which the touchpad may or may not work. Just like the K15A these errors are triggered by undocking and docking the tablet. There also seem to be other problems when undocking and then docking again in quick succession. It seems that in this case the keyboard-controller still retains some power from capacitors and does not go through a power-on-reset leaving it in a confuses state, symptoms of this are: 1. The USB-ids changing to 048d:8910 2. Failure to read the HID descriptors on the second (mouse) USB intf. 3. The touchpad freezing after a while These problems can all be cleared by undocking the keyboard and waiting a full minute before redocking it. Unfortunately there is nothing we can do about this in the kernel. Signed-off-by: Hans de Goede Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 7601ec19ab289..c6a6c8f547954 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -1165,6 +1165,7 @@ #define USB_DEVICE_ID_SYNAPTICS_DELL_K12A 0x2819 #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012 0x2968 #define USB_DEVICE_ID_SYNAPTICS_TP_V103 0x5710 +#define USB_DEVICE_ID_SYNAPTICS_DELL_K15A 0x6e21 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002 0x73f4 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003 0x73f5 #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 152c6aab11b51..fc6173a91af58 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -177,6 +177,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_QUAD_HD), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_TP_V103), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DELL_K12A), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_DELL_K15A), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPMAX, USB_DEVICE_ID_TOPMAX_COBRAPAD), HID_QUIRK_BADPAD }, { HID_USB_DEVICE(USB_VENDOR_ID_TOUCHPACK, USB_DEVICE_ID_TOUCHPACK_RTS), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_TPV, USB_DEVICE_ID_TPV_OPTICAL_TOUCHSCREEN_8882), HID_QUIRK_NOGET }, -- GitLab From 9858c74c29e12be5886280725e781cb735b2aca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Jo=C5=84czyk?= Date: Tue, 6 Apr 2021 20:25:38 +0200 Subject: [PATCH 0059/3804] HID: a4tech: use A4_2WHEEL_MOUSE_HACK_B8 for A4TECH NB-95 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This mouse has a horizontal wheel that requires special handling. Without this patch, the horizontal wheel acts like a vertical wheel. In the output of `hidrd-convert` for this mouse, there is a `Usage (B8h)` field. It corresponds to a byte in packets sent by the device that specifies which wheel generated an input event. The name "A4TECH" is spelled in all capitals on the company website. Signed-off-by: Mateusz Jończyk Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 4 ++-- drivers/hid/hid-a4tech.c | 2 ++ drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 5756203af068d..160554903ef96 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -93,11 +93,11 @@ menu "Special HID drivers" depends on HID config HID_A4TECH - tristate "A4 tech mice" + tristate "A4TECH mice" depends on HID default !EXPERT help - Support for A4 tech X5 and WOP-35 / Trust 450L mice. + Support for some A4TECH mice with two scroll wheels. config HID_ACCUTOUCH tristate "Accutouch touch device" diff --git a/drivers/hid/hid-a4tech.c b/drivers/hid/hid-a4tech.c index 3a8c4a5971f70..2cbc32dda7f74 100644 --- a/drivers/hid/hid-a4tech.c +++ b/drivers/hid/hid-a4tech.c @@ -147,6 +147,8 @@ static const struct hid_device_id a4_devices[] = { .driver_data = A4_2WHEEL_MOUSE_HACK_B8 }, { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649), .driver_data = A4_2WHEEL_MOUSE_HACK_B8 }, + { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_NB_95), + .driver_data = A4_2WHEEL_MOUSE_HACK_B8 }, { } }; MODULE_DEVICE_TABLE(hid, a4_devices); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index c6a6c8f547954..933b0ed9d3ed7 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -26,6 +26,7 @@ #define USB_DEVICE_ID_A4TECH_WCP32PU 0x0006 #define USB_DEVICE_ID_A4TECH_X5_005D 0x000a #define USB_DEVICE_ID_A4TECH_RP_649 0x001a +#define USB_DEVICE_ID_A4TECH_NB_95 0x022b #define USB_VENDOR_ID_AASHIMA 0x06d6 #define USB_DEVICE_ID_AASHIMA_GAMEPAD 0x0025 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index fc6173a91af58..d2933f2ffec58 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -213,6 +213,7 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_WCP32PU) }, { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_X5_005D) }, { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_RP_649) }, + { HID_USB_DEVICE(USB_VENDOR_ID_A4TECH, USB_DEVICE_ID_A4TECH_NB_95) }, #endif #if IS_ENABLED(CONFIG_HID_ACCUTOUCH) { HID_USB_DEVICE(USB_VENDOR_ID_ELO, USB_DEVICE_ID_ELO_ACCUTOUCH_2216) }, -- GitLab From 7b229b13d78d112e2c5d4a60a3c6f602289959fa Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 10 Apr 2021 19:56:05 -0700 Subject: [PATCH 0060/3804] HID: hid-input: add mapping for emoji picker key HUTRR101 added a new usage code for a key that is supposed to invoke and dismiss an emoji picker widget to assist users to locate and enter emojis. This patch adds a new key definition KEY_EMOJI_PICKER and maps 0x0c/0x0d9 usage code to this new keycode. Additionally hid-debug is adjusted to recognize this new usage code as well. Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 1 + drivers/hid/hid-input.c | 3 +++ include/uapi/linux/input-event-codes.h | 1 + 3 files changed, 5 insertions(+) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 59f8d716d78f5..75a59e41724de 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -930,6 +930,7 @@ static const char *keys[KEY_MAX + 1] = { [KEY_APPSELECT] = "AppSelect", [KEY_SCREENSAVER] = "ScreenSaver", [KEY_VOICECOMMAND] = "VoiceCommand", + [KEY_EMOJI_PICKER] = "EmojiPicker", [KEY_BRIGHTNESS_MIN] = "BrightnessMin", [KEY_BRIGHTNESS_MAX] = "BrightnessMax", [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto", diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 18f5e28d475cd..abbfa91e73e43 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -964,6 +964,9 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x0cd: map_key_clear(KEY_PLAYPAUSE); break; case 0x0cf: map_key_clear(KEY_VOICECOMMAND); break; + + case 0x0d9: map_key_clear(KEY_EMOJI_PICKER); break; + case 0x0e0: map_abs_clear(ABS_VOLUME); break; case 0x0e2: map_key_clear(KEY_MUTE); break; case 0x0e5: map_key_clear(KEY_BASSBOOST); break; diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index ee93428ced9a1..225ec87d4f228 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -611,6 +611,7 @@ #define KEY_VOICECOMMAND 0x246 /* Listening Voice Command */ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ +#define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ -- GitLab From 0f0fb3d27e5ba51e40f2af4288efeaf3d293ef1a Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 10 Apr 2021 19:56:06 -0700 Subject: [PATCH 0061/3804] HID: hid-debug: recognize KEY_ASSISTANT and KEY_KBD_LAYOUT_NEXT Add missing descriptions for KEY_ASSISTANT and KEY_KBD_LAYOUT_NEXT. Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- drivers/hid/hid-debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 75a59e41724de..a311fb87b02a1 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -930,6 +930,8 @@ static const char *keys[KEY_MAX + 1] = { [KEY_APPSELECT] = "AppSelect", [KEY_SCREENSAVER] = "ScreenSaver", [KEY_VOICECOMMAND] = "VoiceCommand", + [KEY_ASSISTANT] = "Assistant", + [KEY_KBD_LAYOUT_NEXT] = "KbdLayoutNext", [KEY_EMOJI_PICKER] = "EmojiPicker", [KEY_BRIGHTNESS_MIN] = "BrightnessMin", [KEY_BRIGHTNESS_MAX] = "BrightnessMax", -- GitLab From b0d713c60c75cdd04bf8ad8cfb046c8530709de3 Mon Sep 17 00:00:00 2001 From: Maximilian Luz Date: Sun, 11 Apr 2021 13:34:02 +0200 Subject: [PATCH 0062/3804] HID: surface-hid: Fix integer endian conversion We want to convert from 16 bit (unsigned) little endian values contained in a packed struct to CPU native endian values here, not the other way around. So replace cpu_to_le16() with get_unaligned_le16(), using the latter instead of le16_to_cpu() to acknowledge that we are reading from a packed struct. Reported-by: kernel test robot Fixes: b05ff1002a5c ("HID: Add support for Surface Aggregator Module HID transport") Signed-off-by: Maximilian Luz Signed-off-by: Jiri Kosina --- drivers/hid/surface-hid/surface_hid_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/hid/surface-hid/surface_hid_core.c b/drivers/hid/surface-hid/surface_hid_core.c index 7b27ec3922322..5571e74abe91b 100644 --- a/drivers/hid/surface-hid/surface_hid_core.c +++ b/drivers/hid/surface-hid/surface_hid_core.c @@ -168,9 +168,9 @@ int surface_hid_device_add(struct surface_hid_device *shid) shid->hid->dev.parent = shid->dev; shid->hid->bus = BUS_HOST; - shid->hid->vendor = cpu_to_le16(shid->attrs.vendor); - shid->hid->product = cpu_to_le16(shid->attrs.product); - shid->hid->version = cpu_to_le16(shid->hid_desc.hid_version); + shid->hid->vendor = get_unaligned_le16(&shid->attrs.vendor); + shid->hid->product = get_unaligned_le16(&shid->attrs.product); + shid->hid->version = get_unaligned_le16(&shid->hid_desc.hid_version); shid->hid->country = shid->hid_desc.country_code; snprintf(shid->hid->name, sizeof(shid->hid->name), "Microsoft Surface %04X:%04X", -- GitLab From b45ef5db7bf268f6851bb5395d60301338374abc Mon Sep 17 00:00:00 2001 From: Michael Zaidman Date: Tue, 13 Apr 2021 18:12:00 +0300 Subject: [PATCH 0063/3804] HID: ft260: check data size in ft260_smbus_write() The SMbus block transaction limits the number of bytes transferred to 32, but nothing prevents a user from specifying via ioctl a larger data size than the ft260 can handle in a single transfer. i2cdev_ioctl_smbus() --> i2c_smbus_xfer --> __i2c_smbus_xfer --> ft260_smbus_xfer --> ft260_smbus_write This patch adds data size checking in the ft260_smbus_write(). Fixes: 98189a0adfa0 ("HID: ft260: add usb hid to i2c host bridge driver") Signed-off-by: Michael Zaidman Reported-by: Dan Carpenter Signed-off-by: Jiri Kosina --- drivers/hid/hid-ft260.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-ft260.c b/drivers/hid/hid-ft260.c index a5751607ce24a..7a9ba984a75ac 100644 --- a/drivers/hid/hid-ft260.c +++ b/drivers/hid/hid-ft260.c @@ -201,7 +201,7 @@ struct ft260_i2c_write_request_report { u8 address; /* 7-bit I2C address */ u8 flag; /* I2C transaction condition */ u8 length; /* data payload length */ - u8 data[60]; /* data payload */ + u8 data[FT260_WR_DATA_MAX]; /* data payload */ } __packed; struct ft260_i2c_read_request_report { @@ -429,6 +429,9 @@ static int ft260_smbus_write(struct ft260_device *dev, u8 addr, u8 cmd, struct ft260_i2c_write_request_report *rep = (struct ft260_i2c_write_request_report *)dev->write_buf; + if (data_len >= sizeof(rep->data)) + return -EINVAL; + rep->address = addr; rep->data[0] = cmd; rep->length = data_len + 1; -- GitLab From edb032033da0dc850f6e7740fa1023c73195bc89 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 15 Apr 2021 11:52:31 -0700 Subject: [PATCH 0064/3804] HID: hid-sensor-hub: Return error for hid_set_field() failure In the function sensor_hub_set_feature(), return error when hid_set_field() fails. Signed-off-by: Srinivas Pandruvada Acked-by: Jonathan Cameron Signed-off-by: Jiri Kosina --- drivers/hid/hid-sensor-hub.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-sensor-hub.c b/drivers/hid/hid-sensor-hub.c index 95cf88f3bafb9..6abd3e2a9094c 100644 --- a/drivers/hid/hid-sensor-hub.c +++ b/drivers/hid/hid-sensor-hub.c @@ -209,16 +209,21 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id, buffer_size = buffer_size / sizeof(__s32); if (buffer_size) { for (i = 0; i < buffer_size; ++i) { - hid_set_field(report->field[field_index], i, - (__force __s32)cpu_to_le32(*buf32)); + ret = hid_set_field(report->field[field_index], i, + (__force __s32)cpu_to_le32(*buf32)); + if (ret) + goto done_proc; + ++buf32; } } if (remaining_bytes) { value = 0; memcpy(&value, (u8 *)buf32, remaining_bytes); - hid_set_field(report->field[field_index], i, - (__force __s32)cpu_to_le32(value)); + ret = hid_set_field(report->field[field_index], i, + (__force __s32)cpu_to_le32(value)); + if (ret) + goto done_proc; } hid_hw_request(hsdev->hdev, report, HID_REQ_SET_REPORT); hid_hw_wait(hsdev->hdev); -- GitLab From c980512b4512adf2c6f9edb948ce19423b23124d Mon Sep 17 00:00:00 2001 From: Luke D Jones Date: Sun, 18 Apr 2021 21:12:29 +1200 Subject: [PATCH 0065/3804] HID: asus: filter G713/G733 key event to prevent shutdown The G713 and G733 both emit an unexpected keycode on some key presses such as Fn+Pause. The device in this case is emitting two events on key down, and 3 on key up, the third key up event is report ID 0x02 and is unfiltered, causing incorrect event. This patch filters out the single problematic event. Signed-off-by: Luke D Jones Signed-off-by: Jiri Kosina --- drivers/hid/hid-asus.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 1ed1c05c3d542..60606c11bdaf0 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -355,6 +355,16 @@ static int asus_raw_event(struct hid_device *hdev, return -1; } } + if (drvdata->quirks & QUIRK_ROG_NKEY_KEYBOARD) { + /* + * G713 and G733 send these codes on some keypresses, depending on + * the key pressed it can trigger a shutdown event if not caught. + */ + if(data[0] == 0x02 && data[1] == 0x30) { + return -1; + } + } + } return 0; -- GitLab From 3b2520076822f15621509a6da3bc4a8636cd33b4 Mon Sep 17 00:00:00 2001 From: Saeed Mirzamohammadi Date: Thu, 29 Apr 2021 11:50:39 -0700 Subject: [PATCH 0066/3804] HID: quirks: Add quirk for Lenovo optical mouse The Lenovo optical mouse with vendor id of 0x17ef and product id of 0x600e experiences disconnecting issues every 55 seconds: [38565.706242] usb 1-1.4: Product: Lenovo Optical Mouse [38565.728603] input: Lenovo Optical Mouse as /devices/platform/scb/fd500000.pcie/pci0000:00/0000:00:00.0/0000:01:00.0/usb1/1-1/1-1.4/1-1.4:1.0/0003:17EF:600E.029A/input/input665 [38565.755949] hid-generic 0003:17EF:600E.029A: input,hidraw1: USB HID v1.11 Mouse [Lenovo Optical Mouse] on usb-0000:01:00.0-1.4/input0 [38619.360692] usb 1-1.4: USB disconnect, device number 48 [38620.864990] usb 1-1.4: new low-speed USB device number 49 using xhci_hcd [38620.984011] usb 1-1.4: New USB device found, idVendor=17ef,idProduct=600e, bcdDevice= 1.00 [38620.998117] usb 1-1.4: New USB device strings: Mfr=0, Product=2,SerialNumber=0 This adds HID_QUIRK_ALWAYS_POLL for this device in order to work properly. Signed-off-by: Saeed Mirzamohammadi Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 933b0ed9d3ed7..fad61ac349b7b 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -752,6 +752,7 @@ #define USB_DEVICE_ID_LENOVO_X1_COVER 0x6085 #define USB_DEVICE_ID_LENOVO_X1_TAB 0x60a3 #define USB_DEVICE_ID_LENOVO_X1_TAB3 0x60b5 +#define USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E 0x600e #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D 0x608d #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019 0x6019 #define USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_602E 0x602e diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index d2933f2ffec58..51b39bda9a9d2 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -110,6 +110,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_PENSKETCH_M912), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_KYE_EASYPEN_M406XE), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_KYE, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE_ID2), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_OPTICAL_USB_MOUSE_600E), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_608D), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_6019), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LENOVO, USB_DEVICE_ID_LENOVO_PIXART_USB_MOUSE_602E), HID_QUIRK_ALWAYS_POLL }, -- GitLab From 670a23111e720dd50b07c25437b480f1bdfecc78 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Wed, 28 Apr 2021 20:05:14 -0400 Subject: [PATCH 0067/3804] HID: remove the unnecessary redefinition of a macro USB_VENDOR_ID_CORSAIR is defined twice in the same file with the same value. Signed-off-by: Hamza Mahfooz Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index fad61ac349b7b..b84a0a11e05bf 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -300,8 +300,6 @@ #define USB_VENDOR_ID_CORSAIR 0x1b1c #define USB_DEVICE_ID_CORSAIR_K90 0x1b02 - -#define USB_VENDOR_ID_CORSAIR 0x1b1c #define USB_DEVICE_ID_CORSAIR_K70R 0x1b09 #define USB_DEVICE_ID_CORSAIR_K95RGB 0x1b11 #define USB_DEVICE_ID_CORSAIR_M65RGB 0x1b12 -- GitLab From a2353e3b26012ff43bcdf81d37a3eaddd7ecdbf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?= Date: Mon, 8 Mar 2021 18:42:03 +0100 Subject: [PATCH 0068/3804] HID: multitouch: require Finger field to mark Win8 reports as MT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This effectively changes collection_is_mt from contact ID in report->field to (device is Win8 => collection is finger) && contact ID in report->field Some devices erroneously report Pen for fingers, and Win8 stylus-on-touchscreen devices report contact ID, but mark the accompanying touchscreen device's collection correctly Cc: stable@vger.kernel.org Signed-off-by: Ahelenia Ziemiańska Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 9d9f3e1bd5f41..55dcb8536286b 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -604,9 +604,13 @@ static struct mt_report_data *mt_allocate_report_data(struct mt_device *td, if (!(HID_MAIN_ITEM_VARIABLE & field->flags)) continue; - for (n = 0; n < field->report_count; n++) { - if (field->usage[n].hid == HID_DG_CONTACTID) - rdata->is_mt_collection = true; + if (field->logical == HID_DG_FINGER || td->hdev->group != HID_GROUP_MULTITOUCH_WIN_8) { + for (n = 0; n < field->report_count; n++) { + if (field->usage[n].hid == HID_DG_CONTACTID) { + rdata->is_mt_collection = true; + break; + } + } } } -- GitLab From bc8b796f618c3ccb0a2a8ed1e96c00a1a7849415 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ahelenia=20Ziemia=C5=84ska?= Date: Mon, 8 Mar 2021 18:42:08 +0100 Subject: [PATCH 0069/3804] HID: multitouch: set Stylus suffix for Stylus-application devices, too MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This re-adds the suffix to Win8 stylus-on-touchscreen devices, now that they aren't erroneously marked as MT Signed-off-by: Ahelenia Ziemiańska Signed-off-by: Jiri Kosina --- drivers/hid/hid-multitouch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 55dcb8536286b..eed81bdc2e869 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1580,13 +1580,13 @@ static int mt_input_configured(struct hid_device *hdev, struct hid_input *hi) /* we do not set suffix = "Touchscreen" */ hi->input->name = hdev->name; break; - case HID_DG_STYLUS: - /* force BTN_STYLUS to allow tablet matching in udev */ - __set_bit(BTN_STYLUS, hi->input->keybit); - break; case HID_VD_ASUS_CUSTOM_MEDIA_KEYS: suffix = "Custom Media Keys"; break; + case HID_DG_STYLUS: + /* force BTN_STYLUS to allow tablet matching in udev */ + __set_bit(BTN_STYLUS, hi->input->keybit); + fallthrough; case HID_DG_PEN: suffix = "Stylus"; break; -- GitLab From 48e33befe61a7d407753c53d1a06fc8d6b5dab80 Mon Sep 17 00:00:00 2001 From: Mark Bolhuis Date: Mon, 3 May 2021 17:39:38 +0100 Subject: [PATCH 0070/3804] HID: Add BUS_VIRTUAL to hid_connect logging Add BUS_VIRTUAL to hid_connect logging since it's a valid hid bus type and it should not print Signed-off-by: Mark Bolhuis Signed-off-by: Jiri Kosina --- drivers/hid/hid-core.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 0ae9f6df59d10..265cbe592374c 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -2005,6 +2005,9 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask) case BUS_I2C: bus = "I2C"; break; + case BUS_VIRTUAL: + bus = "VIRTUAL"; + break; default: bus = ""; } -- GitLab From 682ae59ca2876f83396ccc5674235da99beed06c Mon Sep 17 00:00:00 2001 From: Shuming Fan Date: Tue, 4 May 2021 18:04:24 +0800 Subject: [PATCH 0071/3804] ASoC: rt711-sdca: fix the function number of SDCA control for feature unit 0x1E The function number should be FUNC_NUM_MIC_ARRAY(0x2) for the feature unit 0x1E. Fixes: ca5118c0c00f6 ('ASoC: rt711-sdca: change capture switch controls') Signed-off-by: Shuming Fan Link: https://lore.kernel.org/r/20210504100424.8760-1-shumingf@realtek.com Signed-off-by: Mark Brown --- sound/soc/codecs/rt711-sdca.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/rt711-sdca.c b/sound/soc/codecs/rt711-sdca.c index cc36739f7fcfb..24a084e0b48a1 100644 --- a/sound/soc/codecs/rt711-sdca.c +++ b/sound/soc/codecs/rt711-sdca.c @@ -683,13 +683,13 @@ static int rt711_sdca_set_fu1e_capture_ctl(struct rt711_sdca_priv *rt711) ch_r = (rt711->fu1e_dapm_mute || rt711->fu1e_mixer_r_mute) ? 0x01 : 0x00; err = regmap_write(rt711->regmap, - SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT711_SDCA_ENT_USER_FU1E, + SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT711_SDCA_ENT_USER_FU1E, RT711_SDCA_CTL_FU_MUTE, CH_L), ch_l); if (err < 0) return err; err = regmap_write(rt711->regmap, - SDW_SDCA_CTL(FUNC_NUM_JACK_CODEC, RT711_SDCA_ENT_USER_FU1E, + SDW_SDCA_CTL(FUNC_NUM_MIC_ARRAY, RT711_SDCA_ENT_USER_FU1E, RT711_SDCA_CTL_FU_MUTE, CH_R), ch_r); if (err < 0) return err; -- GitLab From 6be388f4a35d2ce5ef7dbf635a8964a5da7f799f Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Sun, 25 Apr 2021 23:03:53 +0530 Subject: [PATCH 0072/3804] HID: usbhid: fix info leak in hid_submit_ctrl In hid_submit_ctrl(), the way of calculating the report length doesn't take into account that report->size can be zero. When running the syzkaller reproducer, a report of size 0 causes hid_submit_ctrl) to calculate transfer_buffer_length as 16384. When this urb is passed to the usb core layer, KMSAN reports an info leak of 16384 bytes. To fix this, first modify hid_report_len() to account for the zero report size case by using DIV_ROUND_UP for the division. Then, call it from hid_submit_ctrl(). Reported-by: syzbot+7c2bb71996f95a82524c@syzkaller.appspotmail.com Signed-off-by: Anirudh Rayabharam Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- drivers/hid/usbhid/hid-core.c | 2 +- include/linux/hid.h | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 86257ce6d6198..4e9077363c962 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -374,7 +374,7 @@ static int hid_submit_ctrl(struct hid_device *hid) raw_report = usbhid->ctrl[usbhid->ctrltail].raw_report; dir = usbhid->ctrl[usbhid->ctrltail].dir; - len = ((report->size - 1) >> 3) + 1 + (report->id > 0); + len = hid_report_len(report); if (dir == USB_DIR_OUT) { usbhid->urbctrl->pipe = usb_sndctrlpipe(hid_to_usb_dev(hid), 0); usbhid->urbctrl->transfer_buffer_length = len; diff --git a/include/linux/hid.h b/include/linux/hid.h index 271021e20a3f8..10e922cee4ebb 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -1167,8 +1167,7 @@ static inline void hid_hw_wait(struct hid_device *hdev) */ static inline u32 hid_report_len(struct hid_report *report) { - /* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */ - return ((report->size - 1) >> 3) + 1 + (report->id > 0); + return DIV_ROUND_UP(report->size, 8) + (report->id > 0); } int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, u32 size, -- GitLab From 9683e5775c75097c46bd24e65411b16ac6c6cbb3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 4 May 2021 16:49:10 -0700 Subject: [PATCH 0073/3804] libbpf: Add NULL check to add_dummy_ksym_var Avoids a segv if btf isn't present. Seen on the call path __bpf_object__open calling bpf_object__collect_externs. Fixes: 5bd022ec01f0 (libbpf: Support extern kernel function) Suggested-by: Stanislav Fomichev Suggested-by: Petar Penkov Signed-off-by: Ian Rogers Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210504234910.976501-1-irogers@google.com --- tools/lib/bpf/libbpf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e2a3cf4378140..c41d9b2b59ace 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3216,6 +3216,9 @@ static int add_dummy_ksym_var(struct btf *btf) const struct btf_var_secinfo *vs; const struct btf_type *sec; + if (!btf) + return 0; + sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC, BTF_KIND_DATASEC); if (sec_btf_id < 0) -- GitLab From 3b80d106e110d39d3f678954d3b55078669cf07e Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Thu, 6 May 2021 14:43:49 +0200 Subject: [PATCH 0074/3804] samples/bpf: Consider frame size in tx_only of xdpsock sample Fix the tx_only micro-benchmark in xdpsock to take frame size into consideration. It was hardcoded to the default value of frame_size which is 4K. Changing this on the command line to 2K made half of the packets illegal as they were outside the umem and were therefore discarded by the kernel. Fixes: 46738f73ea4f ("samples/bpf: add use of need_wakeup flag in xdpsock") Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann Acked-by: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20210506124349.6666-1-magnus.karlsson@gmail.com --- samples/bpf/xdpsock_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index aa696854be787..53e300f860bb4 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1255,7 +1255,7 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) for (i = 0; i < batch_size; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - tx_desc->addr = (*frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; + tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size; tx_desc->len = PKT_SIZE; } -- GitLab From d9aa6571b28ba0022de1e48801ff03a1854c7ef2 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 21 Apr 2021 16:37:35 -0700 Subject: [PATCH 0075/3804] drm/msm/dp: check sink_count before update is_connected status Link status is different from display connected status in the case of something like an Apple dongle where the type-c plug can be connected, and therefore the link is connected, but no sink is connected until an HDMI cable is plugged into the dongle. The sink_count of DPCD of dongle will increase to 1 once an HDMI cable is plugged into the dongle so that display connected status will become true. This checking also apply at pm_resume. Changes in v4: -- none Fixes: 94e58e2d06e3 ("drm/msm/dp: reset dp controller only at boot up and pm_resume") Reported-by: Stephen Boyd Reviewed-by: Stephen Boyd Tested-by: Stephen Boyd Signed-off-by: Kuogee Hsieh Fixes: 8ede2ecc3e5e ("drm/msm/dp: Add DP compliance tests on Snapdragon Chipsets") Link: https://lore.kernel.org/r/1619048258-8717-2-git-send-email-khsieh@codeaurora.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_display.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 5a39da6e1eaf2..0ba71c7a8dd4d 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -586,10 +586,8 @@ static int dp_connect_pending_timeout(struct dp_display_private *dp, u32 data) mutex_lock(&dp->event_mutex); state = dp->hpd_state; - if (state == ST_CONNECT_PENDING) { - dp_display_enable(dp, 0); + if (state == ST_CONNECT_PENDING) dp->hpd_state = ST_CONNECTED; - } mutex_unlock(&dp->event_mutex); @@ -669,10 +667,8 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data mutex_lock(&dp->event_mutex); state = dp->hpd_state; - if (state == ST_DISCONNECT_PENDING) { - dp_display_disable(dp, 0); + if (state == ST_DISCONNECT_PENDING) dp->hpd_state = ST_DISCONNECTED; - } mutex_unlock(&dp->event_mutex); @@ -1272,7 +1268,12 @@ static int dp_pm_resume(struct device *dev) status = dp_catalog_link_is_connected(dp->catalog); - if (status) + /* + * can not declared display is connected unless + * HDMI cable is plugged in and sink_count of + * dongle become 1 + */ + if (status && dp->link->sink_count) dp->dp_display.is_connected = true; else dp->dp_display.is_connected = false; -- GitLab From f2f46b878777e0d3f885c7ddad48f477b4dea247 Mon Sep 17 00:00:00 2001 From: Kuogee Hsieh Date: Wed, 21 Apr 2021 16:37:36 -0700 Subject: [PATCH 0076/3804] drm/msm/dp: initialize audio_comp when audio starts Initialize audio_comp when audio starts and wait for audio_comp at dp_display_disable(). This will take care of both dongle unplugged and display off (suspend) cases. Changes in v2: -- add dp_display_signal_audio_start() Changes in v3: -- restore dp_display_handle_plugged_change() at dp_hpd_unplug_handle(). Changes in v4: -- none Signed-off-by: Kuogee Hsieh Reviewed-by: Stephen Boyd Tested-by: Stephen Boyd Fixes: c703d5789590 ("drm/msm/dp: trigger unplug event in msm_dp_display_disable") Link: https://lore.kernel.org/r/1619048258-8717-3-git-send-email-khsieh@codeaurora.org Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/dp/dp_audio.c | 1 + drivers/gpu/drm/msm/dp/dp_display.c | 11 +++++++++-- drivers/gpu/drm/msm/dp/dp_display.h | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c index 82a8673ab8daf..d7e4a39a904e2 100644 --- a/drivers/gpu/drm/msm/dp/dp_audio.c +++ b/drivers/gpu/drm/msm/dp/dp_audio.c @@ -527,6 +527,7 @@ int dp_audio_hw_params(struct device *dev, dp_audio_setup_acr(audio); dp_audio_safe_to_exit_level(audio); dp_audio_enable(audio, true); + dp_display_signal_audio_start(dp_display); dp_display->audio_enabled = true; end: diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 0ba71c7a8dd4d..1784e119269b7 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -178,6 +178,15 @@ static int dp_del_event(struct dp_display_private *dp_priv, u32 event) return 0; } +void dp_display_signal_audio_start(struct msm_dp *dp_display) +{ + struct dp_display_private *dp; + + dp = container_of(dp_display, struct dp_display_private, dp_display); + + reinit_completion(&dp->audio_comp); +} + void dp_display_signal_audio_complete(struct msm_dp *dp_display) { struct dp_display_private *dp; @@ -649,7 +658,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) dp_add_event(dp, EV_DISCONNECT_PENDING_TIMEOUT, 0, DP_TIMEOUT_5_SECOND); /* signal the disconnect event early to ensure proper teardown */ - reinit_completion(&dp->audio_comp); dp_display_handle_plugged_change(g_dp_display, false); dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK | @@ -894,7 +902,6 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) /* wait only if audio was enabled */ if (dp_display->audio_enabled) { /* signal the disconnect event */ - reinit_completion(&dp->audio_comp); dp_display_handle_plugged_change(dp_display, false); if (!wait_for_completion_timeout(&dp->audio_comp, HZ * 5)) diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h index 6092ba1ed85ed..5173c89eedf7e 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.h +++ b/drivers/gpu/drm/msm/dp/dp_display.h @@ -34,6 +34,7 @@ int dp_display_get_modes(struct msm_dp *dp_display, int dp_display_request_irq(struct msm_dp *dp_display); bool dp_display_check_video_test(struct msm_dp *dp_display); int dp_display_get_test_bpp(struct msm_dp *dp_display); +void dp_display_signal_audio_start(struct msm_dp *dp_display); void dp_display_signal_audio_complete(struct msm_dp *dp_display); #endif /* _DP_DISPLAY_H_ */ -- GitLab From 31379397dcc364a59ce764fabb131b645c43e340 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 5 May 2021 15:25:29 +0200 Subject: [PATCH 0077/3804] bpf: Forbid trampoline attach for functions with variable arguments We can't currently allow to attach functions with variable arguments. The problem is that we should save all the registers for arguments, which is probably doable, but if caller uses more than 6 arguments, we need stack data, which will be wrong, because of the extra stack frame we do in bpf trampoline, so we could crash. Also currently there's malformed trampoline code generated for such functions at the moment as described in: https://lore.kernel.org/bpf/20210429212834.82621-1-jolsa@kernel.org/ Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210505132529.401047-1-jolsa@kernel.org --- kernel/bpf/btf.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 0600ed325fa0b..f982a9f0dbc46 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5206,6 +5206,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, m->ret_size = ret; for (i = 0; i < nargs; i++) { + if (i == nargs - 1 && args[i].type == 0) { + bpf_log(log, + "The function %s with variable args is unsupported.\n", + tname); + return -EINVAL; + } ret = __get_type_size(btf, args[i].type, &t); if (ret < 0) { bpf_log(log, @@ -5213,6 +5219,12 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]); return -EINVAL; } + if (ret == 0) { + bpf_log(log, + "The function %s has malformed void argument.\n", + tname); + return -EINVAL; + } m->arg_size[i] = ret; } m->nr_args = nargs; -- GitLab From 8822702f6e4c8917c83ba79e0ebf2c8c218910d4 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Fri, 7 May 2021 10:44:52 +0800 Subject: [PATCH 0078/3804] ALSA: hda/realtek: reset eapd coeff to default value for alc287 Ubuntu users reported an audio bug on the Lenovo Yoga Slim 7 14IIL05, he installed dual OS (Windows + Linux), if he booted to the Linux from Windows, the Speaker can't work well, it has crackling noise, if he poweroff the machine first after Windows, the Speaker worked well. Before rebooting or shutdown from Windows, the Windows changes the codec eapd coeff value, but the BIOS doesn't re-initialize its value, when booting into the Linux from Windows, the eapd coeff value is not correct. To fix it, set the codec default value to that coeff register in the alsa driver. BugLink: http://bugs.launchpad.net/bugs/1925057 Suggested-by: Kailang Yang Cc: Signed-off-by: Hui Wang Link: https://lore.kernel.org/r/20210507024452.8300-1-hui.wang@canonical.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6d58f24c9702f..a5f3e78ec04e7 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -395,7 +395,6 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0282: case 0x10ec0283: case 0x10ec0286: - case 0x10ec0287: case 0x10ec0288: case 0x10ec0285: case 0x10ec0298: @@ -406,6 +405,10 @@ static void alc_fill_eapd_coef(struct hda_codec *codec) case 0x10ec0275: alc_update_coef_idx(codec, 0xe, 0, 1<<0); break; + case 0x10ec0287: + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + alc_write_coef_idx(codec, 0x8, 0x4ab7); + break; case 0x10ec0293: alc_update_coef_idx(codec, 0xa, 1<<13, 0); break; -- GitLab From 8b79feffeca28c5459458fe78676b081e87c93a4 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Apr 2021 14:35:41 +0200 Subject: [PATCH 0079/3804] x86/kvm: Teardown PV features on boot CPU as well Various PV features (Async PF, PV EOI, steal time) work through memory shared with hypervisor and when we restore from hibernation we must properly teardown all these features to make sure hypervisor doesn't write to stale locations after we jump to the previously hibernated kernel (which can try to place anything there). For secondary CPUs the job is already done by kvm_cpu_down_prepare(), register syscore ops to do the same for boot CPU. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210414123544.1060604-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 56 ++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index dc440bb692223..9d5f96321c7f9 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -451,6 +452,25 @@ static void __init sev_map_percpu_data(void) } } +static void kvm_guest_cpu_offline(void) +{ + kvm_disable_steal_time(); + if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) + wrmsrl(MSR_KVM_PV_EOI_EN, 0); + kvm_pv_disable_apf(); + apf_task_wake_all(); +} + +static int kvm_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + kvm_guest_cpu_init(); + local_irq_restore(flags); + return 0; +} + #ifdef CONFIG_SMP static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); @@ -635,32 +655,34 @@ static void __init kvm_smp_prepare_boot_cpu(void) kvm_spinlock_init(); } -static void kvm_guest_cpu_offline(void) +static int kvm_cpu_down_prepare(unsigned int cpu) { - kvm_disable_steal_time(); - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - apf_task_wake_all(); -} + unsigned long flags; -static int kvm_cpu_online(unsigned int cpu) -{ - local_irq_disable(); - kvm_guest_cpu_init(); - local_irq_enable(); + local_irq_save(flags); + kvm_guest_cpu_offline(); + local_irq_restore(flags); return 0; } -static int kvm_cpu_down_prepare(unsigned int cpu) +#endif + +static int kvm_suspend(void) { - local_irq_disable(); kvm_guest_cpu_offline(); - local_irq_enable(); + return 0; } -#endif +static void kvm_resume(void) +{ + kvm_cpu_online(raw_smp_processor_id()); +} + +static struct syscore_ops kvm_syscore_ops = { + .suspend = kvm_suspend, + .resume = kvm_resume, +}; static void __init kvm_guest_init(void) { @@ -704,6 +726,8 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif + register_syscore_ops(&kvm_syscore_ops); + /* * Hard lockup detection is enabled by default. Disable it, as guests * can get false positives too easily, for example if the host is -- GitLab From c02027b5742b5aa804ef08a4a9db433295533046 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Apr 2021 14:35:42 +0200 Subject: [PATCH 0080/3804] x86/kvm: Disable kvmclock on all CPUs on shutdown Currenly, we disable kvmclock from machine_shutdown() hook and this only happens for boot CPU. We need to disable it for all CPUs to guard against memory corruption e.g. on restore from hibernate. Note, writing '0' to kvmclock MSR doesn't clear memory location, it just prevents hypervisor from updating the location so for the short while after write and while CPU is still alive, the clock remains usable and correct so we don't need to switch to some other clocksource. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210414123544.1060604-4-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_para.h | 4 ++-- arch/x86/kernel/kvm.c | 1 + arch/x86/kernel/kvmclock.c | 5 +---- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 3381198525126..9c56e0defd453 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -7,8 +7,6 @@ #include #include -extern void kvmclock_init(void); - #ifdef CONFIG_KVM_GUEST bool kvm_check_and_clear_guest_paused(void); #else @@ -86,6 +84,8 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, } #ifdef CONFIG_KVM_GUEST +void kvmclock_init(void); +void kvmclock_disable(void); bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 9d5f96321c7f9..25dd126a33250 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -459,6 +459,7 @@ static void kvm_guest_cpu_offline(void) wrmsrl(MSR_KVM_PV_EOI_EN, 0); kvm_pv_disable_apf(); apf_task_wake_all(); + kvmclock_disable(); } static int kvm_cpu_online(unsigned int cpu) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index d37ed4e1d0338..081686df6cd8a 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -220,11 +220,9 @@ static void kvm_crash_shutdown(struct pt_regs *regs) } #endif -static void kvm_shutdown(void) +void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_shutdown(); } static void __init kvmclock_init_mem(void) @@ -351,7 +349,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; - machine_ops.shutdown = kvm_shutdown; #ifdef CONFIG_KEXEC_CORE machine_ops.crash_shutdown = kvm_crash_shutdown; #endif -- GitLab From 3d6b84132d2a57b5a74100f6923a8feb679ac2ce Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Apr 2021 14:35:43 +0200 Subject: [PATCH 0081/3804] x86/kvm: Disable all PV features on crash Crash shutdown handler only disables kvmclock and steal time, other PV features remain active so we risk corrupting memory or getting some side-effects in kdump kernel. Move crash handler to kvm.c and unify with CPU offline. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210414123544.1060604-5-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_para.h | 6 ----- arch/x86/kernel/kvm.c | 44 ++++++++++++++++++++++++--------- arch/x86/kernel/kvmclock.c | 21 ---------------- 3 files changed, 32 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 9c56e0defd453..69299878b200a 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -92,7 +92,6 @@ unsigned int kvm_arch_para_hints(void); void kvm_async_pf_task_wait_schedule(u32 token); void kvm_async_pf_task_wake(u32 token); u32 kvm_read_and_reset_apf_flags(void); -void kvm_disable_steal_time(void); bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled); @@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void) return 0; } -static inline void kvm_disable_steal_time(void) -{ - return; -} - static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) { return false; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 25dd126a33250..8eb91dc0f5a87 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -38,6 +38,7 @@ #include #include #include +#include #include DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); @@ -375,6 +376,14 @@ static void kvm_pv_disable_apf(void) pr_info("disable async PF for cpu %d\n", smp_processor_id()); } +static void kvm_disable_steal_time(void) +{ + if (!has_steal_clock) + return; + + wrmsr(MSR_KVM_STEAL_TIME, 0, 0); +} + static void kvm_pv_guest_cpu_reboot(void *unused) { /* @@ -417,14 +426,6 @@ static u64 kvm_steal_clock(int cpu) return steal; } -void kvm_disable_steal_time(void) -{ - if (!has_steal_clock) - return; - - wrmsr(MSR_KVM_STEAL_TIME, 0, 0); -} - static inline void __set_percpu_decrypted(void *ptr, unsigned long size) { early_set_memory_decrypted((unsigned long) ptr, size); @@ -452,13 +453,14 @@ static void __init sev_map_percpu_data(void) } } -static void kvm_guest_cpu_offline(void) +static void kvm_guest_cpu_offline(bool shutdown) { kvm_disable_steal_time(); if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) wrmsrl(MSR_KVM_PV_EOI_EN, 0); kvm_pv_disable_apf(); - apf_task_wake_all(); + if (!shutdown) + apf_task_wake_all(); kvmclock_disable(); } @@ -661,7 +663,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu) unsigned long flags; local_irq_save(flags); - kvm_guest_cpu_offline(); + kvm_guest_cpu_offline(false); local_irq_restore(flags); return 0; } @@ -670,7 +672,7 @@ static int kvm_cpu_down_prepare(unsigned int cpu) static int kvm_suspend(void) { - kvm_guest_cpu_offline(); + kvm_guest_cpu_offline(false); return 0; } @@ -685,6 +687,20 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, }; +/* + * After a PV feature is registered, the host will keep writing to the + * registered memory location. If the guest happens to shutdown, this memory + * won't be valid. In cases like kexec, in which you install a new kernel, this + * means a random memory location will be kept being written. + */ +#ifdef CONFIG_KEXEC_CORE +static void kvm_crash_shutdown(struct pt_regs *regs) +{ + kvm_guest_cpu_offline(true); + native_machine_crash_shutdown(regs); +} +#endif + static void __init kvm_guest_init(void) { int i; @@ -727,6 +743,10 @@ static void __init kvm_guest_init(void) kvm_guest_cpu_init(); #endif +#ifdef CONFIG_KEXEC_CORE + machine_ops.crash_shutdown = kvm_crash_shutdown; +#endif + register_syscore_ops(&kvm_syscore_ops); /* diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 081686df6cd8a..ad273e5861c1b 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -20,7 +20,6 @@ #include #include #include -#include #include static int kvmclock __initdata = 1; @@ -203,23 +202,6 @@ static void kvm_setup_secondary_clock(void) } #endif -/* - * After the clock is registered, the host will keep writing to the - * registered memory location. If the guest happens to shutdown, this memory - * won't be valid. In cases like kexec, in which you install a new kernel, this - * means a random memory location will be kept being written. So before any - * kind of shutdown from our side, we unregister the clock by writing anything - * that does not have the 'enable' bit set in the msr - */ -#ifdef CONFIG_KEXEC_CORE -static void kvm_crash_shutdown(struct pt_regs *regs) -{ - native_write_msr(msr_kvm_system_time, 0, 0); - kvm_disable_steal_time(); - native_machine_crash_shutdown(regs); -} -#endif - void kvmclock_disable(void) { native_write_msr(msr_kvm_system_time, 0, 0); @@ -349,9 +331,6 @@ void __init kvmclock_init(void) #endif x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; -#ifdef CONFIG_KEXEC_CORE - machine_ops.crash_shutdown = kvm_crash_shutdown; -#endif kvm_get_preset_lpj(); /* -- GitLab From 384fc672f528d3b84eacd9a86ecf35df3363b8ba Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 14 Apr 2021 14:35:44 +0200 Subject: [PATCH 0082/3804] x86/kvm: Unify kvm_pv_guest_cpu_reboot() with kvm_guest_cpu_offline() Simplify the code by making PV features shutdown happen in one place. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210414123544.1060604-6-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8eb91dc0f5a87..a26643dc6bd63 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -384,31 +384,6 @@ static void kvm_disable_steal_time(void) wrmsr(MSR_KVM_STEAL_TIME, 0, 0); } -static void kvm_pv_guest_cpu_reboot(void *unused) -{ - /* - * We disable PV EOI before we load a new kernel by kexec, - * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory. - * New kernel can re-enable when it boots. - */ - if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) - wrmsrl(MSR_KVM_PV_EOI_EN, 0); - kvm_pv_disable_apf(); - kvm_disable_steal_time(); -} - -static int kvm_pv_reboot_notify(struct notifier_block *nb, - unsigned long code, void *unused) -{ - if (code == SYS_RESTART) - on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); - return NOTIFY_DONE; -} - -static struct notifier_block kvm_pv_reboot_nb = { - .notifier_call = kvm_pv_reboot_notify, -}; - static u64 kvm_steal_clock(int cpu) { u64 steal; @@ -687,6 +662,23 @@ static struct syscore_ops kvm_syscore_ops = { .resume = kvm_resume, }; +static void kvm_pv_guest_cpu_reboot(void *unused) +{ + kvm_guest_cpu_offline(true); +} + +static int kvm_pv_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (code == SYS_RESTART) + on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_reboot_nb = { + .notifier_call = kvm_pv_reboot_notify, +}; + /* * After a PV feature is registered, the host will keep writing to the * registered memory location. If the guest happens to shutdown, this memory -- GitLab From 46a63924b05f335b0765ad13dae4d2d7569f25c9 Mon Sep 17 00:00:00 2001 From: Siddharth Chandrasekaran Date: Mon, 3 May 2021 14:00:58 +0200 Subject: [PATCH 0083/3804] doc/kvm: Fix wrong entry for KVM_CAP_X86_MSR_FILTER The capability that exposes new ioctl KVM_X86_SET_MSR_FILTER to userspace is specified incorrectly as the ioctl itself (instead of KVM_CAP_X86_MSR_FILTER). This patch fixes it. Fixes: 1a155254ff93 ("KVM: x86: Introduce MSR filtering") Reviewed-by: Alexander Graf Signed-off-by: Siddharth Chandrasekaran Message-Id: <20210503120059.9283-1-sidcha@amazon.de> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 22d0775621496..7fcb2fd38f42e 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4803,7 +4803,7 @@ KVM_PV_VM_VERIFY 4.126 KVM_X86_SET_MSR_FILTER ---------------------------- -:Capability: KVM_X86_SET_MSR_FILTER +:Capability: KVM_CAP_X86_MSR_FILTER :Architectures: x86 :Type: vm ioctl :Parameters: struct kvm_msr_filter @@ -6715,7 +6715,7 @@ accesses that would usually trigger a #GP by KVM into the guest will instead get bounced to user space through the KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications. -8.27 KVM_X86_SET_MSR_FILTER +8.27 KVM_CAP_X86_MSR_FILTER --------------------------- :Architectures: x86 -- GitLab From f5c7e8425f18fdb9bdb7d13340651d7876890329 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 3 May 2021 17:08:51 +0200 Subject: [PATCH 0084/3804] KVM: nVMX: Always make an attempt to map eVMCS after migration When enlightened VMCS is in use and nested state is migrated with vmx_get_nested_state()/vmx_set_nested_state() KVM can't map evmcs page right away: evmcs gpa is not 'struct kvm_vmx_nested_state_hdr' and we can't read it from VP assist page because userspace may decide to restore HV_X64_MSR_VP_ASSIST_PAGE after restoring nested state (and QEMU, for example, does exactly that). To make sure eVMCS is mapped /vmx_set_nested_state() raises KVM_REQ_GET_NESTED_STATE_PAGES request. Commit f2c7ef3ba955 ("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit") added KVM_REQ_GET_NESTED_STATE_PAGES clearing to nested_vmx_vmexit() to make sure MSR permission bitmap is not switched when an immediate exit from L2 to L1 happens right after migration (caused by a pending event, for example). Unfortunately, in the exact same situation we still need to have eVMCS mapped so nested_sync_vmcs12_to_shadow() reflects changes in VMCS12 to eVMCS. As a band-aid, restore nested_get_evmcs_page() when clearing KVM_REQ_GET_NESTED_STATE_PAGES in nested_vmx_vmexit(). The 'fix' is far from being ideal as we can't easily propagate possible failures and even if we could, this is most likely already too late to do so. The whole 'KVM_REQ_GET_NESTED_STATE_PAGES' idea for mapping eVMCS after migration seems to be fragile as we diverge too much from the 'native' path when vmptr loading happens on vmx_set_nested_state(). Fixes: f2c7ef3ba955 ("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit") Signed-off-by: Vitaly Kuznetsov Message-Id: <20210503150854.1144255-2-vkuznets@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index bced766378232..6058a65a6ede6 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -3098,15 +3098,8 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu) nested_vmx_handle_enlightened_vmptrld(vcpu, false); if (evmptrld_status == EVMPTRLD_VMFAIL || - evmptrld_status == EVMPTRLD_ERROR) { - pr_debug_ratelimited("%s: enlightened vmptrld failed\n", - __func__); - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = - KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; + evmptrld_status == EVMPTRLD_ERROR) return false; - } } return true; @@ -3194,8 +3187,16 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) { - if (!nested_get_evmcs_page(vcpu)) + if (!nested_get_evmcs_page(vcpu)) { + pr_debug_ratelimited("%s: enlightened vmptrld failed\n", + __func__); + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = + KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return false; + } if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu)) return false; @@ -4435,7 +4436,15 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, /* Similarly, triple faults in L2 should never escape. */ WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)); - kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); + if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { + /* + * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map + * Enlightened VMCS after migration and we still need to + * do that when something is forcing L2->L1 exit prior to + * the first L2 run. + */ + (void)nested_get_evmcs_page(vcpu); + } /* Service the TLB flush request for L2 before switching to L1. */ if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) -- GitLab From 32d1b3ab588c1231dbfa9eb08819c50529ce77d7 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 5 May 2021 17:18:21 +0200 Subject: [PATCH 0085/3804] KVM: selftests: evmcs_test: Check that VMLAUNCH with bogus EVMPTR is causing #UD 'run->exit_reason == KVM_EXIT_SHUTDOWN' check is not ideal as we may be getting some unexpected exception. Directly check for #UD instead. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210505151823.1341678-2-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/x86_64/evmcs_test.c | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index ca22ee6d19cbd..b01f64ac6ce30 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -19,6 +19,14 @@ #define VCPU_ID 5 +static int ud_count; + +static void guest_ud_handler(struct ex_regs *regs) +{ + ud_count++; + regs->rip += 3; /* VMLAUNCH */ +} + void l2_guest_code(void) { GUEST_SYNC(7); @@ -71,11 +79,11 @@ void guest_code(struct vmx_pages *vmx_pages) if (vmx_pages) l1_guest_code(vmx_pages); - GUEST_DONE(); - /* Try enlightened vmptrld with an incorrect GPA */ evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs); GUEST_ASSERT(vmlaunch()); + GUEST_ASSERT(ud_count == 1); + GUEST_DONE(); } int main(int argc, char *argv[]) @@ -109,6 +117,10 @@ int main(int argc, char *argv[]) vcpu_alloc_vmx(vm, &vmx_pages_gva); vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); + for (stage = 1;; stage++) { _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -124,7 +136,7 @@ int main(int argc, char *argv[]) case UCALL_SYNC: break; case UCALL_DONE: - goto part1_done; + goto done; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); } @@ -156,10 +168,6 @@ int main(int argc, char *argv[]) (ulong) regs2.rdi, (ulong) regs2.rsi); } -part1_done: - _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, - "Unexpected successful VMEnter with invalid eVMCS pointer!"); - +done: kvm_vm_free(vm); } -- GitLab From c9ecafaf0113a305f5085ceb9c7a4b64ca70eae9 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 5 May 2021 17:18:22 +0200 Subject: [PATCH 0086/3804] KVM: selftests: evmcs_test: Check that VMCS12 is alway properly synced to eVMCS after restore Add a test for the regression, introduced by commit f2c7ef3ba955 ("KVM: nSVM: cancel KVM_REQ_GET_NESTED_STATE_PAGES on nested vmexit"). When L2->L1 exit is forced immediately after restoring nested state, KVM_REQ_GET_NESTED_STATE_PAGES request is cleared and VMCS12 changes (e.g. fresh RIP) are not reflected to eVMCS. The consequent nested vCPU run gets broken. Utilize NMI injection to do the job. Signed-off-by: Vitaly Kuznetsov Message-Id: <20210505151823.1341678-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- .../testing/selftests/kvm/x86_64/evmcs_test.c | 66 +++++++++++++++---- 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index b01f64ac6ce30..63096cea26c61 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -18,30 +18,52 @@ #include "vmx.h" #define VCPU_ID 5 +#define NMI_VECTOR 2 static int ud_count; +void enable_x2apic(void) +{ + uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4); + + wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) | + MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD); + wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED); +} + static void guest_ud_handler(struct ex_regs *regs) { ud_count++; regs->rip += 3; /* VMLAUNCH */ } +static void guest_nmi_handler(struct ex_regs *regs) +{ +} + void l2_guest_code(void) { GUEST_SYNC(7); GUEST_SYNC(8); + /* Forced exit to L1 upon restore */ + GUEST_SYNC(9); + /* Done, exit to L1 and never come back. */ vmcall(); } -void l1_guest_code(struct vmx_pages *vmx_pages) +void guest_code(struct vmx_pages *vmx_pages) { #define L2_GUEST_STACK_SIZE 64 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + enable_x2apic(); + + GUEST_SYNC(1); + GUEST_SYNC(2); + enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist); GUEST_ASSERT(vmx_pages->vmcs_gpa); @@ -63,21 +85,22 @@ void l1_guest_code(struct vmx_pages *vmx_pages) current_evmcs->revision_id = EVMCS_VERSION; GUEST_SYNC(6); + current_evmcs->pin_based_vm_exec_control |= + PIN_BASED_NMI_EXITING; GUEST_ASSERT(!vmlaunch()); GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa); - GUEST_SYNC(9); + + /* + * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is + * up-to-date (RIP points where it should and not at the beginning + * of l2_guest_code(). GUEST_SYNC(9) checkes that. + */ GUEST_ASSERT(!vmresume()); - GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); - GUEST_SYNC(10); -} -void guest_code(struct vmx_pages *vmx_pages) -{ - GUEST_SYNC(1); - GUEST_SYNC(2); + GUEST_SYNC(10); - if (vmx_pages) - l1_guest_code(vmx_pages); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + GUEST_SYNC(11); /* Try enlightened vmptrld with an incorrect GPA */ evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs); @@ -86,6 +109,18 @@ void guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } +void inject_nmi(struct kvm_vm *vm) +{ + struct kvm_vcpu_events events; + + vcpu_events_get(vm, VCPU_ID, &events); + + events.nmi.pending = 1; + events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; + + vcpu_events_set(vm, VCPU_ID, &events); +} + int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva = 0; @@ -120,6 +155,9 @@ int main(int argc, char *argv[]) vm_init_descriptor_tables(vm); vcpu_init_descriptor_tables(vm, VCPU_ID); vm_handle_exception(vm, UD_VECTOR, guest_ud_handler); + vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler); + + pr_info("Running L1 which uses EVMCS to run L2\n"); for (stage = 1;; stage++) { _vcpu_run(vm, VCPU_ID); @@ -166,6 +204,12 @@ int main(int argc, char *argv[]) TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", (ulong) regs2.rdi, (ulong) regs2.rsi); + + /* Force immediate L2->L1 exit before resuming */ + if (stage == 8) { + pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n"); + inject_nmi(vm); + } } done: -- GitLab From 70f094f4f01dc4d6f78ac6407f85627293a6553c Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 3 May 2021 17:08:52 +0200 Subject: [PATCH 0087/3804] KVM: nVMX: Properly pad 'struct kvm_vmx_nested_state_hdr' Eliminate the probably unwanted hole in 'struct kvm_vmx_nested_state_hdr': Pre-patch: struct kvm_vmx_nested_state_hdr { __u64 vmxon_pa; /* 0 8 */ __u64 vmcs12_pa; /* 8 8 */ struct { __u16 flags; /* 16 2 */ } smm; /* 16 2 */ /* XXX 2 bytes hole, try to pack */ __u32 flags; /* 20 4 */ __u64 preemption_timer_deadline; /* 24 8 */ }; Post-patch: struct kvm_vmx_nested_state_hdr { __u64 vmxon_pa; /* 0 8 */ __u64 vmcs12_pa; /* 8 8 */ struct { __u16 flags; /* 16 2 */ } smm; /* 16 2 */ __u16 pad; /* 18 2 */ __u32 flags; /* 20 4 */ __u64 preemption_timer_deadline; /* 24 8 */ }; Signed-off-by: Vitaly Kuznetsov Message-Id: <20210503150854.1144255-3-vkuznets@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/kvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 5a3022c8af82b..0662f644aad9d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr { __u16 flags; } smm; + __u16 pad; + __u32 flags; __u64 preemption_timer_deadline; }; -- GitLab From 5f443e424efab56baa8021da04878f88eb0815d4 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 10 Dec 2020 17:23:17 -0800 Subject: [PATCH 0088/3804] selftests: kvm: remove reassignment of non-absolute variables Clang's integrated assembler does not allow symbols with non-absolute values to be reassigned. Modify the interrupt entry loop macro to be compatible with IAS by using a label and an offset. Cc: Jian Cai Signed-off-by: Bill Wendling References: https://lore.kernel.org/lkml/20200714233024.1789985-1-caij2003@gmail.com/ Message-Id: <20201211012317.3722214-1-morbo@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/x86_64/handlers.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S index aaf7bc7d2ce18..7629819734afd 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -54,9 +54,9 @@ idt_handlers: .align 8 /* Fetch current address and append it to idt_handlers. */ - current_handler = . +666 : .pushsection .rodata -.quad current_handler + .quad 666b .popsection .if ! \has_error -- GitLab From aca352886ebdd675b5131ed4c83bf5477eee5d72 Mon Sep 17 00:00:00 2001 From: Siddharth Chandrasekaran Date: Mon, 3 May 2021 14:21:11 +0200 Subject: [PATCH 0089/3804] KVM: x86: Hoist input checks in kvm_add_msr_filter() In ioctl KVM_X86_SET_MSR_FILTER, input from user space is validated after a memdup_user(). For invalid inputs we'd memdup and then call kfree unnecessarily. Hoist input validation to avoid kfree altogether. Signed-off-by: Siddharth Chandrasekaran Message-Id: <20210503122111.13775-1-sidcha@amazon.de> Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 26 +++++++------------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cebdaa1e3cf5b..102f116d9bb40 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5468,14 +5468,18 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter) static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, struct kvm_msr_filter_range *user_range) { - struct msr_bitmap_range range; unsigned long *bitmap = NULL; size_t bitmap_size; - int r; if (!user_range->nmsrs) return 0; + if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) + return -EINVAL; + + if (!user_range->flags) + return -EINVAL; + bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long); if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE) return -EINVAL; @@ -5484,31 +5488,15 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, if (IS_ERR(bitmap)) return PTR_ERR(bitmap); - range = (struct msr_bitmap_range) { + msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) { .flags = user_range->flags, .base = user_range->base, .nmsrs = user_range->nmsrs, .bitmap = bitmap, }; - if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) { - r = -EINVAL; - goto err; - } - - if (!range.flags) { - r = -EINVAL; - goto err; - } - - /* Everything ok, add this range identifier. */ - msr_filter->ranges[msr_filter->count] = range; msr_filter->count++; - return 0; -err: - kfree(bitmap); - return r; } static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) -- GitLab From 063ab16c14db5a2ef52d54d0475b7fed19c982d7 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 4 May 2021 17:39:35 +0300 Subject: [PATCH 0090/3804] KVM: nSVM: always restore the L1's GIF on migration While usually the L1's GIF is set while L2 runs, and usually migration nested state is loaded after a vCPU reset which also sets L1's GIF to true, this is not guaranteed. Signed-off-by: Maxim Levitsky Message-Id: <20210504143936.1644378-2-mlevitsk@redhat.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 32400cba608d4..b331446f67f3b 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -1314,6 +1314,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, else svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; + svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); + svm->nested.nested_run_pending = !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); -- GitLab From 809c79137a192d7e881a517f803ebbf96305f066 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Tue, 4 May 2021 17:39:36 +0300 Subject: [PATCH 0091/3804] KVM: nSVM: remove a warning about vmcb01 VM exit reason While in most cases, when returning to use the VMCB01, the exit reason stored in it will be SVM_EXIT_VMRUN, on first VM exit after a nested migration this field can contain anything since the VM entry did happen before the migration. Remove this warning to avoid the false positive. Signed-off-by: Maxim Levitsky Message-Id: <20210504143936.1644378-3-mlevitsk@redhat.com> Fixes: 9a7de6ecc3ed ("KVM: nSVM: If VMRUN is single-stepped, queue the #DB intercept in nested_svm_vmexit()") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/nested.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index b331446f67f3b..5e8d8443154e8 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -764,7 +764,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); svm_switch_vmcb(svm, &svm->vmcb01); - WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN); /* * On vmexit the GIF is set to false and -- GitLab From 8aec21c04caa2000f91cf8822ae0811e4b0c3971 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:20 -0700 Subject: [PATCH 0092/3804] KVM: VMX: Do not advertise RDPID if ENABLE_RDTSCP control is unsupported Clear KVM's RDPID capability if the ENABLE_RDTSCP secondary exec control is unsupported. Despite being enumerated in a separate CPUID flag, RDPID is bundled under the same VMCS control as RDTSCP and will #UD in VMX non-root if ENABLE_RDTSCP is not enabled. Fixes: 41cd02c6f7f6 ("kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-2-seanjc@google.com> Reviewed-by: Jim Mattson Reviewed-by: Reiji Watanabe Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cbe0cdade38a5..46573b8626385 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7377,9 +7377,11 @@ static __init void vmx_set_cpu_caps(void) if (!cpu_has_vmx_xsaves()) kvm_cpu_cap_clear(X86_FEATURE_XSAVES); - /* CPUID 0x80000001 */ - if (!cpu_has_vmx_rdtscp()) + /* CPUID 0x80000001 and 0x7 (RDPID) */ + if (!cpu_has_vmx_rdtscp()) { kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); + kvm_cpu_cap_clear(X86_FEATURE_RDPID); + } if (cpu_has_vmx_waitpkg()) kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); -- GitLab From 85d0011264da24be08ae907d7f29983a597ca9b1 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:21 -0700 Subject: [PATCH 0093/3804] KVM: x86: Emulate RDPID only if RDTSCP is supported Do not advertise emulation support for RDPID if RDTSCP is unsupported. RDPID emulation subtly relies on MSR_TSC_AUX to exist in hardware, as both vmx_get_msr() and svm_get_msr() will return an error if the MSR is unsupported, i.e. ctxt->ops->get_msr() will fail and the emulator will inject a #UD. Note, RDPID emulation also relies on RDTSCP being enabled in the guest, but this is a KVM bug and will eventually be fixed. Fixes: fb6d4d340e05 ("KVM: x86: emulate RDPID") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-3-seanjc@google.com> Reviewed-by: Jim Mattson Reviewed-by: Reiji Watanabe Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 19606a3418889..c0e8c5e921898 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -637,7 +637,8 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func) case 7: entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; entry->eax = 0; - entry->ecx = F(RDPID); + if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) + entry->ecx = F(RDPID); ++array->nent; default: break; -- GitLab From 3b195ac9260235624b1c18f7bdaef184479c1d41 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:22 -0700 Subject: [PATCH 0094/3804] KVM: SVM: Inject #UD on RDTSCP when it should be disabled in the guest Intercept RDTSCP to inject #UD if RDTSC is disabled in the guest. Note, SVM does not support intercepting RDPID. Unlike VMX's ENABLE_RDTSCP control, RDTSCP interception does not apply to RDPID. This is a benign virtualization hole as the host kernel (incorrectly) sets MSR_TSC_AUX if RDTSCP is supported, and KVM loads the guest's MSR_TSC_AUX into hardware if RDTSCP is supported in the host, i.e. KVM will not leak the host's MSR_TSC_AUX to the guest. But, when the kernel bug is fixed, KVM will start leaking the host's MSR_TSC_AUX if RDPID is supported in hardware, but RDTSCP isn't available for whatever reason. This leak will be remedied in a future commit. Fixes: 46896c73c1a4 ("KVM: svm: add support for RDTSCP") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-4-seanjc@google.com> Reviewed-by: Jim Mattson Reviewed-by: Reiji Watanabe Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index be5cf612ab1fe..ebcb5849d69b1 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1100,7 +1100,9 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) return svm->vmcb->control.tsc_offset; } -static void svm_check_invpcid(struct vcpu_svm *svm) +/* Evaluate instruction intercepts that depend on guest CPUID features. */ +static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu, + struct vcpu_svm *svm) { /* * Intercept INVPCID if shadow paging is enabled to sync/free shadow @@ -1113,6 +1115,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm) else svm_clr_intercept(svm, INTERCEPT_INVPCID); } + + if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) { + if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + svm_clr_intercept(svm, INTERCEPT_RDTSCP); + else + svm_set_intercept(svm, INTERCEPT_RDTSCP); + } } static void init_vmcb(struct kvm_vcpu *vcpu) @@ -1248,7 +1257,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu) svm_clr_intercept(svm, INTERCEPT_PAUSE); } - svm_check_invpcid(svm); + svm_recalc_instruction_intercepts(vcpu, svm); /* * If the host supports V_SPEC_CTRL then disable the interception @@ -3084,6 +3093,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = { [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, + [SVM_EXIT_RDTSCP] = kvm_handle_invalid_op, [SVM_EXIT_WBINVD] = kvm_emulate_wbinvd, [SVM_EXIT_MONITOR] = kvm_emulate_monitor, [SVM_EXIT_MWAIT] = kvm_emulate_mwait, @@ -4007,8 +4017,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) && guest_cpuid_has(vcpu, X86_FEATURE_NRIPS); - /* Check again if INVPCID interception if required */ - svm_check_invpcid(svm); + svm_recalc_instruction_intercepts(vcpu, svm); /* For sev guests, the memory encryption bit is not reserved in CR3. */ if (sev_guest(vcpu->kvm)) { -- GitLab From 2183de4161b90bd3851ccd3910c87b2c9adfc6ed Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:23 -0700 Subject: [PATCH 0095/3804] KVM: x86: Move RDPID emulation intercept to its own enum Add a dedicated intercept enum for RDPID instead of piggybacking RDTSCP. Unlike VMX's ENABLE_RDTSCP, RDPID is not bound to SVM's RDTSCP intercept. Fixes: fb6d4d340e05 ("KVM: x86: emulate RDPID") Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-5-seanjc@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- arch/x86/kvm/kvm_emulate.h | 1 + arch/x86/kvm/vmx/vmx.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 77e1c89a95a7f..8a0ccdb560766 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4502,7 +4502,7 @@ static const struct opcode group8[] = { * from the register case of group9. */ static const struct gprefix pfx_0f_c7_7 = { - N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp), + N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid), }; diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 0d359115429ad..f016838faedd6 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -468,6 +468,7 @@ enum x86_intercept { x86_intercept_clgi, x86_intercept_skinit, x86_intercept_rdtscp, + x86_intercept_rdpid, x86_intercept_icebp, x86_intercept_wbinvd, x86_intercept_monitor, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 46573b8626385..4a625c7482756 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7437,8 +7437,9 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu, /* * RDPID causes #UD if disabled through secondary execution controls. * Because it is marked as EmulateOnUD, we need to intercept it here. + * Note, RDPID is hidden behind ENABLE_RDTSCP. */ - case x86_intercept_rdtscp: + case x86_intercept_rdpid: if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) { exception->vector = UD_VECTOR; exception->error_code_valid = false; -- GitLab From 5104d7ffcf24749939bea7fdb5378d186473f890 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:24 -0700 Subject: [PATCH 0096/3804] KVM: VMX: Disable preemption when probing user return MSRs Disable preemption when probing a user return MSR via RDSMR/WRMSR. If the MSR holds a different value per logical CPU, the WRMSR could corrupt the host's value if KVM is preempted between the RDMSR and WRMSR, and then rescheduled on a different CPU. Opportunistically land the helper in common x86, SVM will use the helper in a future commit. Fixes: 4be534102624 ("KVM: VMX: Initialize vmx->guest_msrs[] right after allocation") Cc: stable@vger.kernel.org Cc: Xiaoyao Li Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-6-seanjc@google.com> Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 5 +---- arch/x86/kvm/x86.c | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 848956bb3cf1d..e8dcbc632cf8e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1777,6 +1777,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long icr, int op_64_bit); void kvm_define_user_return_msr(unsigned index, u32 msr); +int kvm_probe_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 4a625c7482756..11ff9c3d95d53 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -6914,12 +6914,9 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) { u32 index = vmx_uret_msrs_list[i]; - u32 data_low, data_high; int j = vmx->nr_uret_msrs; - if (rdmsr_safe(index, &data_low, &data_high) < 0) - continue; - if (wrmsr_safe(index, data_low, data_high) < 0) + if (kvm_probe_user_return_msr(index)) continue; vmx->guest_uret_msrs[j].slot = i; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 102f116d9bb40..bd90c73c37b4d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -339,6 +339,22 @@ static void kvm_on_user_return(struct user_return_notifier *urn) } } +int kvm_probe_user_return_msr(u32 msr) +{ + u64 val; + int ret; + + preempt_disable(); + ret = rdmsrl_safe(msr, &val); + if (ret) + goto out; + ret = wrmsrl_safe(msr, val); +out: + preempt_enable(); + return ret; +} +EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr); + void kvm_define_user_return_msr(unsigned slot, u32 msr) { BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS); -- GitLab From 0caa0a77c2f6fcd0830cdcd018db1af98fe35e28 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:25 -0700 Subject: [PATCH 0097/3804] KVM: SVM: Probe and load MSR_TSC_AUX regardless of RDTSCP support in host Probe MSR_TSC_AUX whether or not RDTSCP is supported in the host, and if probing succeeds, load the guest's MSR_TSC_AUX into hardware prior to VMRUN. Because SVM doesn't support interception of RDPID, RDPID cannot be disallowed in the guest (without resorting to binary translation). Leaving the host's MSR_TSC_AUX in hardware would leak the host's value to the guest if RDTSCP is not supported. Note, there is also a kernel bug that prevents leaking the host's value. The host kernel initializes MSR_TSC_AUX if and only if RDTSCP is supported, even though the vDSO usage consumes MSR_TSC_AUX via RDPID. I.e. if RDTSCP is not supported, there is no host value to leak. But, if/when the host kernel bug is fixed, KVM would start leaking MSR_TSC_AUX in the case where hardware supports RDPID but RDTSCP is unavailable for whatever reason. Probing MSR_TSC_AUX will also allow consolidating the probe and define logic in common x86, and will make it simpler to condition the existence of MSR_TSX_AUX (from the guest's perspective) on RDTSCP *or* RDPID. Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-7-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ebcb5849d69b1..13e4dd128177d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -212,7 +212,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data); * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to * defer the restoration of TSC_AUX until the CPU returns to userspace. */ -#define TSC_AUX_URET_SLOT 0 +static int tsc_aux_uret_slot __read_mostly = -1; static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000}; @@ -959,8 +959,10 @@ static __init int svm_hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 32; } - if (boot_cpu_has(X86_FEATURE_RDTSCP)) - kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX); + if (!kvm_probe_user_return_msr(MSR_TSC_AUX)) { + tsc_aux_uret_slot = 0; + kvm_define_user_return_msr(tsc_aux_uret_slot, MSR_TSC_AUX); + } /* Check for pause filtering support */ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { @@ -1454,8 +1456,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) } } - if (static_cpu_has(X86_FEATURE_RDTSCP)) - kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull); + if (likely(tsc_aux_uret_slot >= 0)) + kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull); svm->guest_state_loaded = true; } @@ -2664,7 +2666,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data |= (u64)svm->sysenter_esp_hi << 32; break; case MSR_TSC_AUX: - if (!boot_cpu_has(X86_FEATURE_RDTSCP)) + if (tsc_aux_uret_slot < 0) return 1; if (!msr_info->host_initiated && !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) @@ -2885,7 +2887,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0; break; case MSR_TSC_AUX: - if (!boot_cpu_has(X86_FEATURE_RDTSCP)) + if (tsc_aux_uret_slot < 0) return 1; if (!msr->host_initiated && @@ -2908,7 +2910,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) * guest via direct_access_msrs, and switch it via user return. */ preempt_disable(); - r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull); + r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull); preempt_enable(); if (r) return 1; -- GitLab From 36fa06f9ff39f23e03cd8206dc6bbb7711c23be6 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:26 -0700 Subject: [PATCH 0098/3804] KVM: x86: Add support for RDPID without RDTSCP Allow userspace to enable RDPID for a guest without also enabling RDTSCP. Aside from checking for RDPID support in the obvious flows, VMX also needs to set ENABLE_RDTSCP=1 when RDPID is exposed. For the record, there is no known scenario where enabling RDPID without RDTSCP is desirable. But, both AMD and Intel architectures allow for the condition, i.e. this is purely to make KVM more architecturally accurate. Fixes: 41cd02c6f7f6 ("kvm: x86: Expose RDPID in KVM_GET_SUPPORTED_CPUID") Cc: stable@vger.kernel.org Reported-by: Reiji Watanabe Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-8-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 6 ++++-- arch/x86/kvm/vmx/vmx.c | 27 +++++++++++++++++++++++---- arch/x86/kvm/x86.c | 3 ++- 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 13e4dd128177d..0ba0a00f8dc6a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2669,7 +2669,8 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (tsc_aux_uret_slot < 0) return 1; if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) return 1; msr_info->data = svm->tsc_aux; break; @@ -2891,7 +2892,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) return 1; if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) return 1; /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 11ff9c3d95d53..b304e372aab3c 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1788,7 +1788,8 @@ static void setup_msrs(struct vcpu_vmx *vmx) if (update_transition_efer(vmx)) vmx_setup_uret_msr(vmx, MSR_EFER); - if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP)) + if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || + guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)) vmx_setup_uret_msr(vmx, MSR_TSC_AUX); vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL); @@ -1994,7 +1995,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_TSC_AUX: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) return 1; goto find_uret_msr; case MSR_IA32_DEBUGCTLMSR: @@ -2314,7 +2316,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_TSC_AUX: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP)) + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) return 1; /* Check reserved bit, higher 32 bits should be zero */ if ((data >> 32) != 0) @@ -4368,7 +4371,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx) xsaves_enabled, false); } - vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP); + /* + * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either + * feature is exposed to the guest. This creates a virtualization hole + * if both are supported in hardware but only one is exposed to the + * guest, but letting the guest execute RDTSCP or RDPID when either one + * is advertised is preferable to emulating the advertised instruction + * in KVM on #UD, and obviously better than incorrectly injecting #UD. + */ + if (cpu_has_vmx_rdtscp()) { + bool rdpid_or_rdtscp_enabled = + guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) || + guest_cpuid_has(vcpu, X86_FEATURE_RDPID); + + vmx_adjust_secondary_exec_control(vmx, &exec_control, + SECONDARY_EXEC_ENABLE_RDTSCP, + rdpid_or_rdtscp_enabled, false); + } vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID); vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd90c73c37b4d..0856636efc44c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5941,7 +5941,8 @@ static void kvm_init_msr_list(void) continue; break; case MSR_TSC_AUX: - if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) + if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) && + !kvm_cpu_cap_has(X86_FEATURE_RDPID)) continue; break; case MSR_IA32_UMWAIT_CONTROL: -- GitLab From b6194b94a2ca4affce5aab1bbf773a977ad73671 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:27 -0700 Subject: [PATCH 0099/3804] KVM: VMX: Configure list of user return MSRs at module init Configure the list of user return MSRs that are actually supported at module init instead of reprobing the list of possible MSRs every time a vCPU is created. Curating the list on a per-vCPU basis is pointless; KVM is completely hosed if the set of supported MSRs changes after module init, or if the set of MSRs differs per physical PCU. The per-vCPU lists also increase complexity (see __vmx_find_uret_msr()) and creates corner cases that _should_ be impossible, but theoretically exist in KVM, e.g. advertising RDTSCP to userspace without actually being able to virtualize RDTSCP if probing MSR_TSC_AUX fails. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-9-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 61 ++++++++++++++++++++++++++++-------------- arch/x86/kvm/vmx/vmx.h | 10 ++++++- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index b304e372aab3c..887db1af13125 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -461,7 +461,7 @@ static unsigned long host_idt_base; * support this emulation, IA32_STAR must always be included in * vmx_uret_msrs_list[], even in i386 builds. */ -static const u32 vmx_uret_msrs_list[] = { +static u32 vmx_uret_msrs_list[] = { #ifdef CONFIG_X86_64 MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, #endif @@ -469,6 +469,12 @@ static const u32 vmx_uret_msrs_list[] = { MSR_IA32_TSX_CTRL, }; +/* + * Number of user return MSRs that are actually supported in hardware. + * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs. + */ +static int vmx_nr_uret_msrs; + #if IS_ENABLED(CONFIG_HYPERV) static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); @@ -700,9 +706,16 @@ static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) { int i; - for (i = 0; i < vmx->nr_uret_msrs; ++i) + /* + * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list, + * but is ordered differently. The MSR is matched against the list of + * supported uret MSRs using "slot", but the index that is returned is + * the index into guest_uret_msrs. + */ + for (i = 0; i < vmx_nr_uret_msrs; ++i) { if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr) return i; + } return -1; } @@ -6929,18 +6942,10 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) goto free_vpid; } - BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS); - - for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) { - u32 index = vmx_uret_msrs_list[i]; - int j = vmx->nr_uret_msrs; + for (i = 0; i < vmx_nr_uret_msrs; ++i) { + vmx->guest_uret_msrs[i].data = 0; - if (kvm_probe_user_return_msr(index)) - continue; - - vmx->guest_uret_msrs[j].slot = i; - vmx->guest_uret_msrs[j].data = 0; - switch (index) { + switch (vmx_uret_msrs_list[i]) { case MSR_IA32_TSX_CTRL: /* * TSX_CTRL_CPUID_CLEAR is handled in the CPUID @@ -6954,15 +6959,14 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) * host so that TSX remains always disabled. */ if (boot_cpu_has(X86_FEATURE_RTM)) - vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; + vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; else - vmx->guest_uret_msrs[j].mask = 0; + vmx->guest_uret_msrs[i].mask = 0; break; default: - vmx->guest_uret_msrs[j].mask = -1ull; + vmx->guest_uret_msrs[i].mask = -1ull; break; } - ++vmx->nr_uret_msrs; } err = alloc_loaded_vmcs(&vmx->vmcs01); @@ -7821,17 +7825,34 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, }; +static __init void vmx_setup_user_return_msrs(void) +{ + u32 msr; + int i; + + BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS); + + for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) { + msr = vmx_uret_msrs_list[i]; + + if (kvm_probe_user_return_msr(msr)) + continue; + + kvm_define_user_return_msr(vmx_nr_uret_msrs, msr); + vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr; + } +} + static __init int hardware_setup(void) { unsigned long host_bndcfgs; struct desc_ptr dt; - int r, i, ept_lpage_level; + int r, ept_lpage_level; store_idt(&dt); host_idt_base = dt.address; - for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) - kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]); + vmx_setup_user_return_msrs(); if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0) return -EIO; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 008cb87ff088c..d71ed8b425c52 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -245,8 +245,16 @@ struct vcpu_vmx { u32 idt_vectoring_info; ulong rflags; + /* + * User return MSRs are always emulated when enabled in the guest, but + * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside + * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to + * be loaded into hardware if those conditions aren't met. + * nr_active_uret_msrs tracks the number of MSRs that need to be loaded + * into hardware when running the guest. guest_uret_msrs[] is resorted + * whenever the number of "active" uret MSRs is modified. + */ struct vmx_uret_msr guest_uret_msrs[MAX_NR_USER_RETURN_MSRS]; - int nr_uret_msrs; int nr_active_uret_msrs; bool guest_uret_msrs_loaded; #ifdef CONFIG_X86_64 -- GitLab From ee9d22e08d1341692a43926e5e1d84c90a5dac1d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:28 -0700 Subject: [PATCH 0100/3804] KVM: VMX: Use flag to indicate "active" uret MSRs instead of sorting list Explicitly flag a uret MSR as needing to be loaded into hardware instead of resorting the list of "active" MSRs and tracking how many MSRs in total need to be loaded. The only benefit to sorting the list is that the loop to load MSRs during vmx_prepare_switch_to_guest() doesn't need to iterate over all supported uret MRS, only those that are active. But that is a pointless optimization, as the most common case, running a 64-bit guest, will load the vast majority of MSRs. Not to mention that a single WRMSR is far more expensive than iterating over the list. Providing a stable list order obviates the need to track a given MSR's "slot" in the per-CPU list of user return MSRs; all lists simply use the same ordering. Future patches will take advantage of the stable order to further simplify the related code. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-10-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 80 ++++++++++++++++++++++-------------------- arch/x86/kvm/vmx/vmx.h | 2 +- 2 files changed, 42 insertions(+), 40 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 887db1af13125..adefedac0e3b9 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -458,8 +458,9 @@ static unsigned long host_idt_base; * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm * will emulate SYSCALL in legacy mode if the vendor string in guest * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To - * support this emulation, IA32_STAR must always be included in - * vmx_uret_msrs_list[], even in i386 builds. + * support this emulation, MSR_STAR is included in the list for i386, + * but is never loaded into hardware. MSR_CSTAR is also never loaded + * into hardware and is here purely for emulation purposes. */ static u32 vmx_uret_msrs_list[] = { #ifdef CONFIG_X86_64 @@ -702,18 +703,12 @@ static bool is_valid_passthrough_msr(u32 msr) return r; } -static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) +static inline int __vmx_find_uret_msr(u32 msr) { int i; - /* - * Note, vmx->guest_uret_msrs is the same size as vmx_uret_msrs_list, - * but is ordered differently. The MSR is matched against the list of - * supported uret MSRs using "slot", but the index that is returned is - * the index into guest_uret_msrs. - */ for (i = 0; i < vmx_nr_uret_msrs; ++i) { - if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr) + if (vmx_uret_msrs_list[i] == msr) return i; } return -1; @@ -723,7 +718,7 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) { int i; - i = __vmx_find_uret_msr(vmx, msr); + i = __vmx_find_uret_msr(msr); if (i >= 0) return &vmx->guest_uret_msrs[i]; return NULL; @@ -732,13 +727,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx, struct vmx_uret_msr *msr, u64 data) { + unsigned int slot = msr - vmx->guest_uret_msrs; int ret = 0; u64 old_msr_data = msr->data; msr->data = data; - if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) { + if (msr->load_into_hardware) { preempt_disable(); - ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask); + ret = kvm_set_user_return_msr(slot, msr->data, msr->mask); preempt_enable(); if (ret) msr->data = old_msr_data; @@ -1090,7 +1086,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx) return false; } - i = __vmx_find_uret_msr(vmx, MSR_EFER); + i = __vmx_find_uret_msr(MSR_EFER); if (i < 0) return false; @@ -1252,11 +1248,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) */ if (!vmx->guest_uret_msrs_loaded) { vmx->guest_uret_msrs_loaded = true; - for (i = 0; i < vmx->nr_active_uret_msrs; ++i) - kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot, + for (i = 0; i < vmx_nr_uret_msrs; ++i) { + if (!vmx->guest_uret_msrs[i].load_into_hardware) + continue; + + kvm_set_user_return_msr(i, vmx->guest_uret_msrs[i].data, vmx->guest_uret_msrs[i].mask); - + } } if (vmx->nested.need_vmcs12_to_shadow_sync) @@ -1763,19 +1762,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) vmx_clear_hlt(vcpu); } -static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr) +static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr, + bool load_into_hardware) { - struct vmx_uret_msr tmp; - int from, to; + struct vmx_uret_msr *uret_msr; - from = __vmx_find_uret_msr(vmx, msr); - if (from < 0) + uret_msr = vmx_find_uret_msr(vmx, msr); + if (!uret_msr) return; - to = vmx->nr_active_uret_msrs++; - tmp = vmx->guest_uret_msrs[to]; - vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from]; - vmx->guest_uret_msrs[from] = tmp; + uret_msr->load_into_hardware = load_into_hardware; } /* @@ -1785,30 +1781,36 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr) */ static void setup_msrs(struct vcpu_vmx *vmx) { - vmx->guest_uret_msrs_loaded = false; - vmx->nr_active_uret_msrs = 0; #ifdef CONFIG_X86_64 + bool load_syscall_msrs; + /* * The SYSCALL MSRs are only needed on long mode guests, and only * when EFER.SCE is set. */ - if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) { - vmx_setup_uret_msr(vmx, MSR_STAR); - vmx_setup_uret_msr(vmx, MSR_LSTAR); - vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK); - } + load_syscall_msrs = is_long_mode(&vmx->vcpu) && + (vmx->vcpu.arch.efer & EFER_SCE); + + vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs); + vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs); + vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs); #endif - if (update_transition_efer(vmx)) - vmx_setup_uret_msr(vmx, MSR_EFER); + vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx)); - if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || - guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)) - vmx_setup_uret_msr(vmx, MSR_TSC_AUX); + vmx_setup_uret_msr(vmx, MSR_TSC_AUX, + guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || + guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)); - vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL); + vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, true); if (cpu_has_vmx_msr_bitmap()) vmx_update_msr_bitmap(&vmx->vcpu); + + /* + * The set of MSRs to load may have changed, reload MSRs before the + * next VM-Enter. + */ + vmx->guest_uret_msrs_loaded = false; } static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index d71ed8b425c52..16e4e457ba23c 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -36,7 +36,7 @@ struct vmx_msrs { }; struct vmx_uret_msr { - unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */ + bool load_into_hardware; u64 data; u64 mask; }; -- GitLab From 8ea8b8d6f869425e21f34e60bdbe7e47e6c9d6b9 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:29 -0700 Subject: [PATCH 0101/3804] KVM: VMX: Use common x86's uret MSR list as the one true list Drop VMX's global list of user return MSRs now that VMX doesn't resort said list to isolate "active" MSRs, i.e. now that VMX's list and x86's list have the same MSRs in the same order. In addition to eliminating the redundant list, this will also allow moving more of the list management into common x86. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-11-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 97 ++++++++++++++------------------- arch/x86/kvm/x86.c | 12 ++++ 3 files changed, 53 insertions(+), 57 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e8dcbc632cf8e..6b15f27f49d01 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1777,6 +1777,7 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long icr, int op_64_bit); void kvm_define_user_return_msr(unsigned index, u32 msr); +int kvm_find_user_return_msr(u32 msr); int kvm_probe_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index adefedac0e3b9..39506704be966 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -454,26 +454,7 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) static unsigned long host_idt_base; -/* - * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm - * will emulate SYSCALL in legacy mode if the vendor string in guest - * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To - * support this emulation, MSR_STAR is included in the list for i386, - * but is never loaded into hardware. MSR_CSTAR is also never loaded - * into hardware and is here purely for emulation purposes. - */ -static u32 vmx_uret_msrs_list[] = { -#ifdef CONFIG_X86_64 - MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, -#endif - MSR_EFER, MSR_TSC_AUX, MSR_STAR, - MSR_IA32_TSX_CTRL, -}; - -/* - * Number of user return MSRs that are actually supported in hardware. - * vmx_uret_msrs_list is modified when KVM is loaded to drop unsupported MSRs. - */ +/* Number of user return MSRs that are actually supported in hardware. */ static int vmx_nr_uret_msrs; #if IS_ENABLED(CONFIG_HYPERV) @@ -703,22 +684,11 @@ static bool is_valid_passthrough_msr(u32 msr) return r; } -static inline int __vmx_find_uret_msr(u32 msr) -{ - int i; - - for (i = 0; i < vmx_nr_uret_msrs; ++i) { - if (vmx_uret_msrs_list[i] == msr) - return i; - } - return -1; -} - struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr) { int i; - i = __vmx_find_uret_msr(msr); + i = kvm_find_user_return_msr(msr); if (i >= 0) return &vmx->guest_uret_msrs[i]; return NULL; @@ -1086,7 +1056,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx) return false; } - i = __vmx_find_uret_msr(MSR_EFER); + i = kvm_find_user_return_msr(MSR_EFER); if (i < 0) return false; @@ -6922,6 +6892,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) static int vmx_create_vcpu(struct kvm_vcpu *vcpu) { + struct vmx_uret_msr *tsx_ctrl; struct vcpu_vmx *vmx; int i, cpu, err; @@ -6946,29 +6917,25 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) for (i = 0; i < vmx_nr_uret_msrs; ++i) { vmx->guest_uret_msrs[i].data = 0; - - switch (vmx_uret_msrs_list[i]) { - case MSR_IA32_TSX_CTRL: - /* - * TSX_CTRL_CPUID_CLEAR is handled in the CPUID - * interception. Keep the host value unchanged to avoid - * changing CPUID bits under the host kernel's feet. - * - * hle=0, rtm=0, tsx_ctrl=1 can be found with some - * combinations of new kernel and old userspace. If - * those guests run on a tsx=off host, do allow guests - * to use TSX_CTRL, but do not change the value on the - * host so that TSX remains always disabled. - */ - if (boot_cpu_has(X86_FEATURE_RTM)) - vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; - else - vmx->guest_uret_msrs[i].mask = 0; - break; - default: - vmx->guest_uret_msrs[i].mask = -1ull; - break; - } + vmx->guest_uret_msrs[i].mask = -1ull; + } + tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); + if (tsx_ctrl) { + /* + * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception. + * Keep the host value unchanged to avoid changing CPUID bits + * under the host kernel's feet. + * + * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations + * of new kernel and old userspace. If those guests run on a + * tsx=off host, do allow guests to use TSX_CTRL, but do not + * change the value on the host so that TSX remains always + * disabled. + */ + if (boot_cpu_has(X86_FEATURE_RTM)) + vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; + else + vmx->guest_uret_msrs[i].mask = 0; } err = alloc_loaded_vmcs(&vmx->vmcs01); @@ -7829,6 +7796,22 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { static __init void vmx_setup_user_return_msrs(void) { + + /* + * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm + * will emulate SYSCALL in legacy mode if the vendor string in guest + * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To + * support this emulation, MSR_STAR is included in the list for i386, + * but is never loaded into hardware. MSR_CSTAR is also never loaded + * into hardware and is here purely for emulation purposes. + */ + const u32 vmx_uret_msrs_list[] = { + #ifdef CONFIG_X86_64 + MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, + #endif + MSR_EFER, MSR_TSC_AUX, MSR_STAR, + MSR_IA32_TSX_CTRL, + }; u32 msr; int i; @@ -7841,7 +7824,7 @@ static __init void vmx_setup_user_return_msrs(void) continue; kvm_define_user_return_msr(vmx_nr_uret_msrs, msr); - vmx_uret_msrs_list[vmx_nr_uret_msrs++] = msr; + vmx_nr_uret_msrs++; } } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0856636efc44c..d514031ed25f8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -364,6 +364,18 @@ void kvm_define_user_return_msr(unsigned slot, u32 msr) } EXPORT_SYMBOL_GPL(kvm_define_user_return_msr); +int kvm_find_user_return_msr(u32 msr) +{ + int i; + + for (i = 0; i < user_return_msrs_global.nr; ++i) { + if (user_return_msrs_global.msrs[i] == msr) + return i; + } + return -1; +} +EXPORT_SYMBOL_GPL(kvm_find_user_return_msr); + static void kvm_user_return_msr_cpu_online(void) { unsigned int cpu = smp_processor_id(); -- GitLab From 5e17c624010a82bbcca9b955155781927eb6532a Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:30 -0700 Subject: [PATCH 0102/3804] KVM: VMX: Disable loading of TSX_CTRL MSR the more conventional way Tag TSX_CTRL as not needing to be loaded when RTM isn't supported in the host. Crushing the write mask to '0' has the same effect, but requires more mental gymnastics to understand. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-12-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 39506704be966..bd2187b4cc530 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1771,7 +1771,13 @@ static void setup_msrs(struct vcpu_vmx *vmx) guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) || guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID)); - vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, true); + /* + * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new + * kernel and old userspace. If those guests run on a tsx=off host, do + * allow guests to use TSX_CTRL, but don't change the value in hardware + * so that TSX remains always disabled. + */ + vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM)); if (cpu_has_vmx_msr_bitmap()) vmx_update_msr_bitmap(&vmx->vcpu); @@ -6919,23 +6925,15 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) vmx->guest_uret_msrs[i].data = 0; vmx->guest_uret_msrs[i].mask = -1ull; } - tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); - if (tsx_ctrl) { + if (boot_cpu_has(X86_FEATURE_RTM)) { /* * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception. * Keep the host value unchanged to avoid changing CPUID bits * under the host kernel's feet. - * - * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations - * of new kernel and old userspace. If those guests run on a - * tsx=off host, do allow guests to use TSX_CTRL, but do not - * change the value on the host so that TSX remains always - * disabled. */ - if (boot_cpu_has(X86_FEATURE_RTM)) + tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL); + if (tsx_ctrl) vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; - else - vmx->guest_uret_msrs[i].mask = 0; } err = alloc_loaded_vmcs(&vmx->vmcs01); -- GitLab From 9cc39a5a43c05f8eda206bf9e144119820ecf5c8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:31 -0700 Subject: [PATCH 0103/3804] KVM: x86: Export the number of uret MSRs to vendor modules Split out and export the number of configured user return MSRs so that VMX can iterate over the set of MSRs without having to do its own tracking. Keep the list itself internal to x86 so that vendor code still has to go through the "official" APIs to add/modify entries. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-13-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 29 +++++++++++++---------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 6b15f27f49d01..22505e74c3dac 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1418,6 +1418,7 @@ struct kvm_arch_async_pf { bool direct_map; }; +extern u32 __read_mostly kvm_nr_uret_msrs; extern u64 __read_mostly host_efer; extern bool __read_mostly allow_smaller_maxphyaddr; extern struct kvm_x86_ops kvm_x86_ops; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d514031ed25f8..5e1deed8ea5d2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -184,11 +184,6 @@ module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR); */ #define KVM_MAX_NR_USER_RETURN_MSRS 16 -struct kvm_user_return_msrs_global { - int nr; - u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS]; -}; - struct kvm_user_return_msrs { struct user_return_notifier urn; bool registered; @@ -198,7 +193,9 @@ struct kvm_user_return_msrs { } values[KVM_MAX_NR_USER_RETURN_MSRS]; }; -static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global; +u32 __read_mostly kvm_nr_uret_msrs; +EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs); +static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS]; static struct kvm_user_return_msrs __percpu *user_return_msrs; #define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \ @@ -330,10 +327,10 @@ static void kvm_on_user_return(struct user_return_notifier *urn) user_return_notifier_unregister(urn); } local_irq_restore(flags); - for (slot = 0; slot < user_return_msrs_global.nr; ++slot) { + for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) { values = &msrs->values[slot]; if (values->host != values->curr) { - wrmsrl(user_return_msrs_global.msrs[slot], values->host); + wrmsrl(kvm_uret_msrs_list[slot], values->host); values->curr = values->host; } } @@ -358,9 +355,9 @@ EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr); void kvm_define_user_return_msr(unsigned slot, u32 msr) { BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS); - user_return_msrs_global.msrs[slot] = msr; - if (slot >= user_return_msrs_global.nr) - user_return_msrs_global.nr = slot + 1; + kvm_uret_msrs_list[slot] = msr; + if (slot >= kvm_nr_uret_msrs) + kvm_nr_uret_msrs = slot + 1; } EXPORT_SYMBOL_GPL(kvm_define_user_return_msr); @@ -368,8 +365,8 @@ int kvm_find_user_return_msr(u32 msr) { int i; - for (i = 0; i < user_return_msrs_global.nr; ++i) { - if (user_return_msrs_global.msrs[i] == msr) + for (i = 0; i < kvm_nr_uret_msrs; ++i) { + if (kvm_uret_msrs_list[i] == msr) return i; } return -1; @@ -383,8 +380,8 @@ static void kvm_user_return_msr_cpu_online(void) u64 value; int i; - for (i = 0; i < user_return_msrs_global.nr; ++i) { - rdmsrl_safe(user_return_msrs_global.msrs[i], &value); + for (i = 0; i < kvm_nr_uret_msrs; ++i) { + rdmsrl_safe(kvm_uret_msrs_list[i], &value); msrs->values[i].host = value; msrs->values[i].curr = value; } @@ -399,7 +396,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) value = (value & mask) | (msrs->values[slot].host & ~mask); if (value == msrs->values[slot].curr) return 0; - err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value); + err = wrmsrl_safe(kvm_uret_msrs_list[slot], value); if (err) return 1; -- GitLab From e5fda4bbadb053e3b5164476146cf43092785c0b Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:32 -0700 Subject: [PATCH 0104/3804] KVM: x86: Move uret MSR slot management to common x86 Now that SVM and VMX both probe MSRs before "defining" user return slots for them, consolidate the code for probe+define into common x86 and eliminate the odd behavior of having the vendor code define the slot for a given MSR. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-14-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +-- arch/x86/kvm/svm/svm.c | 5 +---- arch/x86/kvm/vmx/vmx.c | 19 ++++--------------- arch/x86/kvm/x86.c | 19 +++++++++++-------- 4 files changed, 17 insertions(+), 29 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 22505e74c3dac..8155eaac22a7e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1777,9 +1777,8 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, unsigned long icr, int op_64_bit); -void kvm_define_user_return_msr(unsigned index, u32 msr); +int kvm_add_user_return_msr(u32 msr); int kvm_find_user_return_msr(u32 msr); -int kvm_probe_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0ba0a00f8dc6a..d13b72bfb736b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -959,10 +959,7 @@ static __init int svm_hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 32; } - if (!kvm_probe_user_return_msr(MSR_TSC_AUX)) { - tsc_aux_uret_slot = 0; - kvm_define_user_return_msr(tsc_aux_uret_slot, MSR_TSC_AUX); - } + tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); /* Check for pause filtering support */ if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index bd2187b4cc530..042823b492730 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -454,9 +454,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) static unsigned long host_idt_base; -/* Number of user return MSRs that are actually supported in hardware. */ -static int vmx_nr_uret_msrs; - #if IS_ENABLED(CONFIG_HYPERV) static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); @@ -1218,7 +1215,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) */ if (!vmx->guest_uret_msrs_loaded) { vmx->guest_uret_msrs_loaded = true; - for (i = 0; i < vmx_nr_uret_msrs; ++i) { + for (i = 0; i < kvm_nr_uret_msrs; ++i) { if (!vmx->guest_uret_msrs[i].load_into_hardware) continue; @@ -6921,7 +6918,7 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) goto free_vpid; } - for (i = 0; i < vmx_nr_uret_msrs; ++i) { + for (i = 0; i < kvm_nr_uret_msrs; ++i) { vmx->guest_uret_msrs[i].data = 0; vmx->guest_uret_msrs[i].mask = -1ull; } @@ -7810,20 +7807,12 @@ static __init void vmx_setup_user_return_msrs(void) MSR_EFER, MSR_TSC_AUX, MSR_STAR, MSR_IA32_TSX_CTRL, }; - u32 msr; int i; BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS); - for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) { - msr = vmx_uret_msrs_list[i]; - - if (kvm_probe_user_return_msr(msr)) - continue; - - kvm_define_user_return_msr(vmx_nr_uret_msrs, msr); - vmx_nr_uret_msrs++; - } + for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) + kvm_add_user_return_msr(vmx_uret_msrs_list[i]); } static __init int hardware_setup(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5e1deed8ea5d2..0a0eebf35dd53 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -336,7 +336,7 @@ static void kvm_on_user_return(struct user_return_notifier *urn) } } -int kvm_probe_user_return_msr(u32 msr) +static int kvm_probe_user_return_msr(u32 msr) { u64 val; int ret; @@ -350,16 +350,18 @@ out: preempt_enable(); return ret; } -EXPORT_SYMBOL_GPL(kvm_probe_user_return_msr); -void kvm_define_user_return_msr(unsigned slot, u32 msr) +int kvm_add_user_return_msr(u32 msr) { - BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS); - kvm_uret_msrs_list[slot] = msr; - if (slot >= kvm_nr_uret_msrs) - kvm_nr_uret_msrs = slot + 1; + BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS); + + if (kvm_probe_user_return_msr(msr)) + return -1; + + kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr; + return kvm_nr_uret_msrs++; } -EXPORT_SYMBOL_GPL(kvm_define_user_return_msr); +EXPORT_SYMBOL_GPL(kvm_add_user_return_msr); int kvm_find_user_return_msr(u32 msr) { @@ -8132,6 +8134,7 @@ int kvm_arch_init(void *opaque) printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n"); goto out_free_x86_emulator_cache; } + kvm_nr_uret_msrs = 0; r = kvm_mmu_module_init(); if (r) -- GitLab From 61a05d444d2ca8d40add453a5f7058fbb1b57eca Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:33 -0700 Subject: [PATCH 0105/3804] KVM: x86: Tie Intel and AMD behavior for MSR_TSC_AUX to guest CPU model Squish the Intel and AMD emulation of MSR_TSC_AUX together and tie it to the guest CPU model instead of the host CPU behavior. While not strictly necessary to avoid guest breakage, emulating cross-vendor "architecture" will provide consistent behavior for the guest, e.g. WRMSR fault behavior won't change if the vCPU is migrated to a host with divergent behavior. Note, the "new" kvm_is_supported_user_return_msr() checks do not add new functionality on either SVM or VMX. On SVM, the equivalent was "tsc_aux_uret_slot < 0", and on VMX the check was buried in the vmx_find_uret_msr() call at the find_uret_msr label. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-15-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 5 +++++ arch/x86/kvm/svm/svm.c | 24 ---------------------- arch/x86/kvm/vmx/vmx.c | 15 -------------- arch/x86/kvm/x86.c | 36 +++++++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8155eaac22a7e..f85480b1d215f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1781,6 +1781,11 @@ int kvm_add_user_return_msr(u32 msr); int kvm_find_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); +static inline bool kvm_is_supported_user_return_msr(u32 msr) +{ + return kvm_find_user_return_msr(msr) >= 0; +} + u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc); u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d13b72bfb736b..0922d8e173e61 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2663,12 +2663,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data |= (u64)svm->sysenter_esp_hi << 32; break; case MSR_TSC_AUX: - if (tsc_aux_uret_slot < 0) - return 1; - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) - return 1; msr_info->data = svm->tsc_aux; break; /* @@ -2885,24 +2879,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0; break; case MSR_TSC_AUX: - if (tsc_aux_uret_slot < 0) - return 1; - - if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) - return 1; - - /* - * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has - * incomplete and conflicting architectural behavior. Current - * AMD CPUs completely ignore bits 63:32, i.e. they aren't - * reserved and always read as zeros. Emulate AMD CPU behavior - * to avoid explosions if the vCPU is migrated from an AMD host - * to an Intel host. - */ - data = (u32)data; - /* * TSC_AUX is usually changed only during boot and never read * directly. Intercept TSC_AUX instead of exposing it to the diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 042823b492730..61f7e5221bf3a 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1981,12 +1981,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) else msr_info->data = vmx->pt_desc.guest.addr_a[index / 2]; break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) - return 1; - goto find_uret_msr; case MSR_IA32_DEBUGCTLMSR: msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL); break; @@ -2302,15 +2296,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) else vmx->pt_desc.guest.addr_a[index / 2] = data; break; - case MSR_TSC_AUX: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && - !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) - return 1; - /* Check reserved bit, higher 32 bits should be zero */ - if ((data >> 32) != 0) - return 1; - goto find_uret_msr; case MSR_IA32_PERF_CAPABILITIES: if (data && !vcpu_to_pmu(vcpu)->version) return 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0a0eebf35dd53..4efd2978ec089 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1642,6 +1642,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, * invokes 64-bit SYSENTER. */ data = get_canonical(data, vcpu_virt_addr_bits(vcpu)); + break; + case MSR_TSC_AUX: + if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) + return 1; + + if (!host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) + return 1; + + /* + * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has + * incomplete and conflicting architectural behavior. Current + * AMD CPUs completely ignore bits 63:32, i.e. they aren't + * reserved and always read as zeros. Enforce Intel's reserved + * bits check if and only if the guest CPU is Intel, and clear + * the bits in all other cases. This ensures cross-vendor + * migration will provide consistent behavior for the guest. + */ + if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0) + return 1; + + data = (u32)data; + break; } msr.data = data; @@ -1678,6 +1702,18 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ)) return KVM_MSR_RET_FILTERED; + switch (index) { + case MSR_TSC_AUX: + if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) + return 1; + + if (!host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) && + !guest_cpuid_has(vcpu, X86_FEATURE_RDPID)) + return 1; + break; + } + msr.index = index; msr.host_initiated = host_initiated; -- GitLab From 78bba966ee3cdbbfc585d8e39237378fba50a142 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 4 May 2021 10:17:34 -0700 Subject: [PATCH 0106/3804] KVM: x86: Hide RDTSCP and RDPID if MSR_TSC_AUX probing failed If probing MSR_TSC_AUX failed, hide RDTSCP and RDPID, and WARN if either feature was reported as supported. In theory, such a scenario should never happen as both Intel and AMD state that MSR_TSC_AUX is available if RDTSCP or RDPID is supported. But, KVM injects #GP on MSR_TSC_AUX accesses if probing failed, faults on WRMSR(MSR_TSC_AUX) may be fatal to the guest (because they happen during early CPU bringup), and KVM itself has effectively misreported RDPID support in the past. Note, this also has the happy side effect of omitting MSR_TSC_AUX from the list of MSRs that are exposed to userspace if probing the MSR fails. Signed-off-by: Sean Christopherson Message-Id: <20210504171734.1434054-16-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c0e8c5e921898..f42e9491a6c8c 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -567,6 +567,21 @@ void kvm_set_cpu_caps(void) F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) | F(PMM) | F(PMM_EN) ); + + /* + * Hide RDTSCP and RDPID if either feature is reported as supported but + * probing MSR_TSC_AUX failed. This is purely a sanity check and + * should never happen, but the guest will likely crash if RDTSCP or + * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in + * the past. For example, the sanity check may fire if this instance of + * KVM is running as L1 on top of an older, broken KVM. + */ + if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) || + kvm_cpu_cap_has(X86_FEATURE_RDPID)) && + !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) { + kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); + kvm_cpu_cap_clear(X86_FEATURE_RDPID); + } } EXPORT_SYMBOL_GPL(kvm_set_cpu_caps); -- GitLab From 34114136f725cbd0c83e7b5a0c8a977976cd82f7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 5 May 2021 22:15:09 +1000 Subject: [PATCH 0107/3804] KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks Commit b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks") causes unmap_gfn_range and age_gfn callbacks to only work on the first gfn in the range. It also makes the aging callbacks call into both radix and hash aging functions for radix guests. Fix this. Add warnings for the single-gfn calls that have been converted to range callbacks, in case they ever receieve ranges greater than 1. Fixes: b1c5356e873c ("KVM: PPC: Convert to the gfn-based MMU notifier callbacks") Reported-by: Bharata B Rao Tested-by: Bharata B Rao Signed-off-by: Nicholas Piggin Message-Id: <20210505121509.1470207-1-npiggin@gmail.com> Signed-off-by: Paolo Bonzini --- arch/powerpc/include/asm/kvm_book3s.h | 2 +- arch/powerpc/kvm/book3s_64_mmu_hv.c | 46 ++++++++++++++++++-------- arch/powerpc/kvm/book3s_64_mmu_radix.c | 5 ++- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index a6e9a5585e618..e6b53c6e21e32 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -210,7 +210,7 @@ extern void kvmppc_free_pgtable_radix(struct kvm *kvm, pgd_t *pgd, unsigned int lpid); extern int kvmppc_radix_init(void); extern void kvmppc_radix_exit(void); -extern bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, +extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index b7bd9ca040b85..2d9193cd73be4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -795,7 +795,7 @@ static void kvmppc_unmap_hpte(struct kvm *kvm, unsigned long i, } } -static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, +static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { unsigned long i; @@ -829,15 +829,21 @@ static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, unlock_rmap(rmapp); __unlock_hpte(hptep, be64_to_cpu(hptep[0])); } - return false; } bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range) { - if (kvm_is_radix(kvm)) - return kvm_unmap_radix(kvm, range->slot, range->start); + gfn_t gfn; + + if (kvm_is_radix(kvm)) { + for (gfn = range->start; gfn < range->end; gfn++) + kvm_unmap_radix(kvm, range->slot, gfn); + } else { + for (gfn = range->start; gfn < range->end; gfn++) + kvm_unmap_rmapp(kvm, range->slot, range->start); + } - return kvm_unmap_rmapp(kvm, range->slot, range->start); + return false; } void kvmppc_core_flush_memslot_hv(struct kvm *kvm, @@ -924,10 +930,18 @@ static bool kvm_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) { - if (kvm_is_radix(kvm)) - kvm_age_radix(kvm, range->slot, range->start); + gfn_t gfn; + bool ret = false; - return kvm_age_rmapp(kvm, range->slot, range->start); + if (kvm_is_radix(kvm)) { + for (gfn = range->start; gfn < range->end; gfn++) + ret |= kvm_age_radix(kvm, range->slot, gfn); + } else { + for (gfn = range->start; gfn < range->end; gfn++) + ret |= kvm_age_rmapp(kvm, range->slot, gfn); + } + + return ret; } static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, @@ -965,18 +979,24 @@ static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot, bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) { - if (kvm_is_radix(kvm)) - kvm_test_age_radix(kvm, range->slot, range->start); + WARN_ON(range->start + 1 != range->end); - return kvm_test_age_rmapp(kvm, range->slot, range->start); + if (kvm_is_radix(kvm)) + return kvm_test_age_radix(kvm, range->slot, range->start); + else + return kvm_test_age_rmapp(kvm, range->slot, range->start); } bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range) { + WARN_ON(range->start + 1 != range->end); + if (kvm_is_radix(kvm)) - return kvm_unmap_radix(kvm, range->slot, range->start); + kvm_unmap_radix(kvm, range->slot, range->start); + else + kvm_unmap_rmapp(kvm, range->slot, range->start); - return kvm_unmap_rmapp(kvm, range->slot, range->start); + return false; } static int vcpus_running(struct kvm *kvm) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index ec4f58fa9f5a2..d909c069363e0 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -993,7 +993,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_vcpu *vcpu, } /* Called with kvm->mmu_lock held */ -bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, +void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { pte_t *ptep; @@ -1002,14 +1002,13 @@ bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) { uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT); - return false; + return; } ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); - return false; } /* Called with kvm->mmu_lock held */ -- GitLab From e8ea85fb280ec55674bca88ea7cd85f60d19567f Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Tue, 2 Feb 2021 17:04:32 +0800 Subject: [PATCH 0108/3804] KVM: X86: Add support for the emulation of DR6_BUS_LOCK bit Bus lock debug exception introduces a new bit DR6_BUS_LOCK (bit 11 of DR6) to indicate that bus lock #DB exception is generated. The set/clear of DR6_BUS_LOCK is similar to the DR6_RTM. The processor clears DR6_BUS_LOCK when the exception is generated. For all other #DB, the processor sets this bit to 1. Software #DB handler should set this bit before returning to the interrupted task. In VMM, to avoid breaking the CPUs without bus lock #DB exception support, activate the DR6_BUS_LOCK conditionally in DR6_FIXED_1 bits. When intercepting the #DB exception caused by bus locks, bit 11 of the exit qualification is set to identify it. The VMM should emulate the exception by clearing the bit 11 of the guest DR6. Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Chenyi Qiang Message-Id: <20210202090433.13441-3-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/x86.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f85480b1d215f..8836b30962178 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -200,6 +200,7 @@ enum x86_intercept_stage; #define KVM_NR_DB_REGS 4 +#define DR6_BUS_LOCK (1 << 11) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) #define DR6_BT (1 << 15) @@ -213,7 +214,7 @@ enum x86_intercept_stage; * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. */ #define DR6_ACTIVE_LOW 0xffff0ff0 -#define DR6_VOLATILE 0x0001e00f +#define DR6_VOLATILE 0x0001e80f #define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) #define DR7_BP_EN_MASK 0x000000ff diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4efd2978ec089..9b1a7040eae31 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1176,6 +1176,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM)) fixed |= DR6_RTM; + + if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + fixed |= DR6_BUS_LOCK; return fixed; } -- GitLab From 76ea438b4afcd9ee8da3387e9af4625eaccff58f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 6 May 2021 06:30:04 -0400 Subject: [PATCH 0109/3804] KVM: X86: Expose bus lock debug exception to guest Bus lock debug exception is an ability to notify the kernel by an #DB trap after the instruction acquires a bus lock and is executed when CPL>0. This allows the kernel to enforce user application throttling or mitigations. Existence of bus lock debug exception is enumerated via CPUID.(EAX=7,ECX=0).ECX[24]. Software can enable these exceptions by setting bit 2 of the MSR_IA32_DEBUGCTL. Expose the CPUID to guest and emulate the MSR handling when guest enables it. Support for this feature was originally developed by Xiaoyao Li and Chenyi Qiang, but code has since changed enough that this patch has nothing in common with theirs, except for this commit message. Co-developed-by: Xiaoyao Li Signed-off-by: Xiaoyao Li Signed-off-by: Chenyi Qiang Message-Id: <20210202090433.13441-4-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/cpuid.c | 2 +- arch/x86/kvm/vmx/capabilities.h | 3 +++ arch/x86/kvm/vmx/vmx.c | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index f42e9491a6c8c..9a48f138832d4 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void) F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) | F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) | F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ | - F(SGX_LC) + F(SGX_LC) | F(BUS_LOCK_DETECT) ); /* Set LA57 based on hardware capability. */ if (cpuid_ecx(7) & F(LA57)) diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index d1d77985e889f..8dee8a5fbc17f 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -398,6 +398,9 @@ static inline u64 vmx_supported_debugctl(void) { u64 debugctl = 0; + if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) + debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; + if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) debugctl |= DEBUGCTLMSR_LBR_MASK; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 61f7e5221bf3a..f2fd447eed459 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -2014,6 +2014,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu) if (!intel_pmu_lbr_is_enabled(vcpu)) debugctl &= ~DEBUGCTLMSR_LBR_MASK; + if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) + debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + return debugctl; } -- GitLab From 03ca4589fabcc66b27e4cb8f8e95d64cf43badd0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 5 May 2021 13:42:21 -0700 Subject: [PATCH 0110/3804] KVM: x86: Prevent KVM SVM from loading on kernels with 5-level paging Disallow loading KVM SVM if 5-level paging is supported. In theory, NPT for L1 should simply work, but there unknowns with respect to how the guest's MAXPHYADDR will be handled by hardware. Nested NPT is more problematic, as running an L1 VMM that is using 2-level page tables requires stacking single-entry PDP and PML4 tables in KVM's NPT for L2, as there are no equivalent entries in L1's NPT to shadow. Barring hardware magic, for 5-level paging, KVM would need stack another layer to handle PML5. Opportunistically rename the lm_root pointer, which is used for the aforementioned stacking when shadowing 2-level L1 NPT, to pml4_root to call out that it's specifically for PML4. Suggested-by: Paolo Bonzini Signed-off-by: Sean Christopherson Message-Id: <20210505204221.1934471-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu/mmu.c | 20 ++++++++++---------- arch/x86/kvm/svm/svm.c | 5 +++++ 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8836b30962178..55efbacfc2445 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -409,7 +409,7 @@ struct kvm_mmu { u32 pkru_mask; u64 *pae_root; - u64 *lm_root; + u64 *pml4_root; /* * check zero bits on shadow page table entries, these diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 4b3ee244ebe05..0144c40d09c76 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3310,12 +3310,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) { pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; - if (WARN_ON_ONCE(!mmu->lm_root)) { + if (WARN_ON_ONCE(!mmu->pml4_root)) { r = -EIO; goto out_unlock; } - mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask; + mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask; } for (i = 0; i < 4; ++i) { @@ -3335,7 +3335,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) } if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) - mmu->root_hpa = __pa(mmu->lm_root); + mmu->root_hpa = __pa(mmu->pml4_root); else mmu->root_hpa = __pa(mmu->pae_root); @@ -3350,7 +3350,7 @@ out_unlock: static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; - u64 *lm_root, *pae_root; + u64 *pml4_root, *pae_root; /* * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP @@ -3369,14 +3369,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL)) return -EIO; - if (mmu->pae_root && mmu->lm_root) + if (mmu->pae_root && mmu->pml4_root) return 0; /* * The special roots should always be allocated in concert. Yell and * bail if KVM ends up in a state where only one of the roots is valid. */ - if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root)) + if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root)) return -EIO; /* @@ -3387,14 +3387,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu) if (!pae_root) return -ENOMEM; - lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); - if (!lm_root) { + pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); + if (!pml4_root) { free_page((unsigned long)pae_root); return -ENOMEM; } mmu->pae_root = pae_root; - mmu->lm_root = lm_root; + mmu->pml4_root = pml4_root; return 0; } @@ -5261,7 +5261,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu) if (!tdp_enabled && mmu->pae_root) set_memory_encrypted((unsigned long)mmu->pae_root, 1); free_page((unsigned long)mmu->pae_root); - free_page((unsigned long)mmu->lm_root); + free_page((unsigned long)mmu->pml4_root); } static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0922d8e173e61..8124f51e9488f 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -447,6 +447,11 @@ static int has_svm(void) return 0; } + if (pgtable_l5_enabled()) { + pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n"); + return 0; + } + return 1; } -- GitLab From 594b27e677b35f9734b1969d175ebc6146741109 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 5 May 2021 23:48:17 +0200 Subject: [PATCH 0111/3804] KVM: x86: Cancel pvclock_gtod_work on module removal Nothing prevents the following: pvclock_gtod_notify() queue_work(system_long_wq, &pvclock_gtod_work); ... remove_module(kvm); ... work_queue_run() pvclock_gtod_work() <- UAF Ditto for any other operation on that workqueue list head which touches pvclock_gtod_work after module removal. Cancel the work in kvm_arch_exit() to prevent that. Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes") Signed-off-by: Thomas Gleixner Message-Id: <87czu4onry.ffs@nanos.tec.linutronix.de> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9b1a7040eae31..259139a145cbb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8224,6 +8224,7 @@ void kvm_arch_exit(void) cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); + cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; kvm_mmu_module_exit(); -- GitLab From 3f804f6d201ca93adf4c3df04d1bfd152c1129d6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 6 May 2021 15:21:37 +0200 Subject: [PATCH 0112/3804] KVM: x86: Prevent deadlock against tk_core.seq syzbot reported a possible deadlock in pvclock_gtod_notify(): CPU 0 CPU 1 write_seqcount_begin(&tk_core.seq); pvclock_gtod_notify() spin_lock(&pool->lock); queue_work(..., &pvclock_gtod_work) ktime_get() spin_lock(&pool->lock); do { seq = read_seqcount_begin(tk_core.seq) ... } while (read_seqcount_retry(&tk_core.seq, seq); While this is unlikely to happen, it's possible. Delegate queue_work() to irq_work() which postpones it until the tk_core.seq write held region is left and interrupts are reenabled. Fixes: 16e8d74d2da9 ("KVM: x86: notifier for clocksource changes") Reported-by: syzbot+6beae4000559d41d80f8@syzkaller.appspotmail.com Signed-off-by: Thomas Gleixner Message-Id: <87h7jgm1zy.ffs@nanos.tec.linutronix.de> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 259139a145cbb..5bd550eaf6833 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8094,6 +8094,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work) static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); +/* + * Indirection to move queue_work() out of the tk_core.seq write held + * region to prevent possible deadlocks against time accessors which + * are invoked with work related locks held. + */ +static void pvclock_irq_work_fn(struct irq_work *w) +{ + queue_work(system_long_wq, &pvclock_gtod_work); +} + +static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn); + /* * Notification about pvclock gtod data update. */ @@ -8105,13 +8117,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused, update_pvclock_gtod(tk); - /* disable master clock if host does not trust, or does not - * use, TSC based clocksource. + /* + * Disable master clock if host does not trust, or does not use, + * TSC based clocksource. Delegate queue_work() to irq_work as + * this is invoked with tk_core.seq write held. */ if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) && atomic_read(&kvm_guest_has_master_clock) != 0) - queue_work(system_long_wq, &pvclock_gtod_work); - + irq_work_queue(&pvclock_irq_work); return 0; } @@ -8224,6 +8237,7 @@ void kvm_arch_exit(void) cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); + irq_work_sync(&pvclock_irq_work); cancel_work_sync(&pvclock_gtod_work); #endif kvm_x86_ops.hardware_enable = NULL; -- GitLab From b26990987ffce0525abbd84b36595869cfdbbfe6 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 6 May 2021 16:03:52 +0200 Subject: [PATCH 0113/3804] tools/kvm_stat: Fix documentation typo Makes the dash in front of option '-z' disappear in the generated man-page. Signed-off-by: Stefan Raspl Message-Id: <20210506140352.4178789-1-raspl@linux.ibm.com> Signed-off-by: Paolo Bonzini --- tools/kvm/kvm_stat/kvm_stat.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt index feaf46451e838..3a9f2037bd23f 100644 --- a/tools/kvm/kvm_stat/kvm_stat.txt +++ b/tools/kvm/kvm_stat/kvm_stat.txt @@ -111,7 +111,7 @@ OPTIONS --tracepoints:: retrieve statistics from tracepoints -*z*:: +-z:: --skip-zero-records:: omit records with all zeros in logging mode -- GitLab From 258785ef08b323bddd844b4926a32c2b2045a1b0 Mon Sep 17 00:00:00 2001 From: David Matlack Date: Thu, 6 May 2021 15:24:43 +0000 Subject: [PATCH 0114/3804] kvm: Cap halt polling at kvm->max_halt_poll_ns When growing halt-polling, there is no check that the poll time exceeds the per-VM limit. It's possible for vcpu->halt_poll_ns to grow past kvm->max_halt_poll_ns and stay there until a halt which takes longer than kvm->halt_poll_ns. Signed-off-by: David Matlack Signed-off-by: Venkatesh Srinivas Message-Id: <20210506152442.4010298-1-venkateshs@chromium.org> Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b9f12da6af0ea..6b4feb92dc797 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2893,8 +2893,8 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu) if (val < grow_start) val = grow_start; - if (val > halt_poll_ns) - val = halt_poll_ns; + if (val > vcpu->kvm->max_halt_poll_ns) + val = vcpu->kvm->max_halt_poll_ns; vcpu->halt_poll_ns = val; out: -- GitLab From 368340a3c7d9a207bfe544721d464b7109be8eae Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 6 May 2021 16:15:42 -0700 Subject: [PATCH 0115/3804] KVM: SVM: Invert user pointer casting in SEV {en,de}crypt helpers Invert the user pointer params for SEV's helpers for encrypting and decrypting guest memory so that they take a pointer and cast to an unsigned long as necessary, as opposed to doing the opposite. Tagging a non-pointer as __user is confusing and weird since a cast of some form needs to occur to actually access the user data. This also fixes Sparse warnings triggered by directly consuming the unsigned longs, which are "noderef" due to the __user tag. Cc: Brijesh Singh Cc: Tom Lendacky Cc: Ashish Kalra Signed-off-by: Sean Christopherson Message-Id: <20210506231542.2331138-1-seanjc@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 8b11c711a0e40..eb241c1a4add3 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -763,7 +763,7 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr, } static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, - unsigned long __user dst_uaddr, + void __user *dst_uaddr, unsigned long dst_paddr, int size, int *err) { @@ -787,8 +787,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr, if (tpage) { offset = paddr & 15; - if (copy_to_user((void __user *)(uintptr_t)dst_uaddr, - page_address(tpage) + offset, size)) + if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size)) ret = -EFAULT; } @@ -800,9 +799,9 @@ e_free: } static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, - unsigned long __user vaddr, + void __user *vaddr, unsigned long dst_paddr, - unsigned long __user dst_vaddr, + void __user *dst_vaddr, int size, int *error) { struct page *src_tpage = NULL; @@ -810,13 +809,12 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, int ret, len = size; /* If source buffer is not aligned then use an intermediate buffer */ - if (!IS_ALIGNED(vaddr, 16)) { + if (!IS_ALIGNED((unsigned long)vaddr, 16)) { src_tpage = alloc_page(GFP_KERNEL); if (!src_tpage) return -ENOMEM; - if (copy_from_user(page_address(src_tpage), - (void __user *)(uintptr_t)vaddr, size)) { + if (copy_from_user(page_address(src_tpage), vaddr, size)) { __free_page(src_tpage); return -EFAULT; } @@ -830,7 +828,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, * - copy the source buffer in an intermediate buffer * - use the intermediate buffer as source buffer */ - if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { + if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) { int dst_offset; dst_tpage = alloc_page(GFP_KERNEL); @@ -855,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr, page_address(src_tpage), size); else { if (copy_from_user(page_address(dst_tpage) + dst_offset, - (void __user *)(uintptr_t)vaddr, size)) { + vaddr, size)) { ret = -EFAULT; goto e_free; } @@ -935,15 +933,15 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec) if (dec) ret = __sev_dbg_decrypt_user(kvm, __sme_page_pa(src_p[0]) + s_off, - dst_vaddr, + (void __user *)dst_vaddr, __sme_page_pa(dst_p[0]) + d_off, len, &argp->error); else ret = __sev_dbg_encrypt_user(kvm, __sme_page_pa(src_p[0]) + s_off, - vaddr, + (void __user *)vaddr, __sme_page_pa(dst_p[0]) + d_off, - dst_vaddr, + (void __user *)dst_vaddr, len, &argp->error); sev_unpin_memory(kvm, src_p, n); -- GitLab From ce7ea0cfdc2e9ff31d12da31c3226deddb9644f5 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Thu, 6 May 2021 15:14:41 -0500 Subject: [PATCH 0116/3804] KVM: SVM: Move GHCB unmapping to fix RCU warning When an SEV-ES guest is running, the GHCB is unmapped as part of the vCPU run support. However, kvm_vcpu_unmap() triggers an RCU dereference warning with CONFIG_PROVE_LOCKING=y because the SRCU lock is released before invoking the vCPU run support. Move the GHCB unmapping into the prepare_guest_switch callback, which is invoked while still holding the SRCU lock, eliminating the RCU dereference warning. Fixes: 291bd20d5d88 ("KVM: SVM: Add initial support for a VMGEXIT VMEXIT") Reported-by: Borislav Petkov Signed-off-by: Tom Lendacky Message-Id: Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/sev.c | 5 +---- arch/x86/kvm/svm/svm.c | 3 +++ arch/x86/kvm/svm/svm.h | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index eb241c1a4add3..5bc887e9a9860 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2197,7 +2197,7 @@ vmgexit_err: return -EINVAL; } -static void pre_sev_es_run(struct vcpu_svm *svm) +void sev_es_unmap_ghcb(struct vcpu_svm *svm) { if (!svm->ghcb) return; @@ -2233,9 +2233,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) struct svm_cpu_data *sd = per_cpu(svm_data, cpu); int asid = sev_get_asid(svm->vcpu.kvm); - /* Perform any SEV-ES pre-run actions */ - pre_sev_es_run(svm); - /* Assign the asid allocated with this SEV guest */ svm->asid = asid; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 8124f51e9488f..4dd9b7856e5b1 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1437,6 +1437,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); + if (sev_es_guest(vcpu->kvm)) + sev_es_unmap_ghcb(svm); + if (svm->guest_state_loaded) return; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 84b3133c2251d..e44567ceb8655 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -581,6 +581,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_create_vcpu(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu); +void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ -- GitLab From db8e712e06874e37a1fdb9bb011618811fc96dbd Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 7 May 2021 12:09:03 +0300 Subject: [PATCH 0117/3804] bus: ti-sysc: Fix missing quirk flags for sata Naresh Kamboju reported that Beaglebone-X15 does not detect sata drives any longer after dra7 was flipped to boot with device tree data only. Turns out we are now missing the sata related quirk flags in ti-sysc that we used to have earlier. Fixes: 98feab31ac49 ("ARM: OMAP2+: Drop legacy platform data for dra7 sata") Fixes: 21206c8f2cb5 ("ARM: OMAP2+: Drop legacy platform data for omap5 sata") Link: https://lore.kernel.org/regressions/CA+G9fYtTN6ug3eBAW3wMcDeESUo+ebj7L5HBe5_fj4uqDExFQg@mail.gmail.com/ Reported-by: Naresh Kamboju Tested-by: Naresh Kamboju Signed-off-by: Tony Lindgren --- drivers/bus/ti-sysc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 8880259b41ae3..b3e7a6e8de716 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -1459,6 +1459,8 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY), SYSC_QUIRK("tptc", 0, 0, -ENODEV, -ENODEV, 0x40007c00, 0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY), + SYSC_QUIRK("sata", 0, 0xfc, 0x1100, -ENODEV, 0x5e412000, 0xffffffff, + SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY), SYSC_QUIRK("usb_host_hs", 0, 0, 0x10, 0x14, 0x50700100, 0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY), SYSC_QUIRK("usb_host_hs", 0, 0, 0x10, -ENODEV, 0x50700101, 0xffffffff, @@ -1524,7 +1526,6 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { SYSC_QUIRK("prcm", 0, 0, -ENODEV, -ENODEV, 0x40000400, 0xffffffff, 0), SYSC_QUIRK("rfbi", 0x4832a800, 0, 0x10, 0x14, 0x00000010, 0xffffffff, 0), SYSC_QUIRK("rfbi", 0x58002000, 0, 0x10, 0x14, 0x00000010, 0xffffffff, 0), - SYSC_QUIRK("sata", 0, 0xfc, 0x1100, -ENODEV, 0x5e412000, 0xffffffff, 0), SYSC_QUIRK("scm", 0, 0, 0x10, -ENODEV, 0x40000900, 0xffffffff, 0), SYSC_QUIRK("scm", 0, 0, -ENODEV, -ENODEV, 0x4e8b0100, 0xffffffff, 0), SYSC_QUIRK("scm", 0, 0, -ENODEV, -ENODEV, 0x4f000100, 0xffffffff, 0), -- GitLab From ca66a6770bd9d6d99e469debd1c7363ac455daf9 Mon Sep 17 00:00:00 2001 From: Johnny Chuang Date: Tue, 13 Apr 2021 09:20:50 +0800 Subject: [PATCH 0118/3804] HID: i2c-hid: Skip ELAN power-on command after reset For ELAN touchscreen, we found our boot code of IC was not flexible enough to receive and handle this command. Once the FW main code of our controller is crashed for some reason, the controller could not be enumerated successfully to be recognized by the system host. therefore, it lost touch functionality. Add quirk for skip send power-on command after reset. It will impact to ELAN touchscreen and touchpad on HID over I2C projects. Fixes: 43b7029f475e ("HID: i2c-hid: Send power-on command after reset"). Cc: stable@vger.kernel.org Signed-off-by: Johnny Chuang Reviewed-by: Harry Cutts Reviewed-by: Douglas Anderson Tested-by: Douglas Anderson Signed-off-by: Benjamin Tissoires --- drivers/hid/i2c-hid/i2c-hid-core.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index 9993133989a58..ce91b1e57876d 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -45,6 +45,7 @@ #define I2C_HID_QUIRK_BOGUS_IRQ BIT(4) #define I2C_HID_QUIRK_RESET_ON_RESUME BIT(5) #define I2C_HID_QUIRK_BAD_INPUT_SIZE BIT(6) +#define I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET BIT(7) /* flags */ @@ -178,6 +179,11 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_RESET_ON_RESUME }, { USB_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720, I2C_HID_QUIRK_BAD_INPUT_SIZE }, + /* + * Sending the wakeup after reset actually break ELAN touchscreen controller + */ + { USB_VENDOR_ID_ELAN, HID_ANY_ID, + I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET }, { 0, 0 } }; @@ -461,7 +467,8 @@ static int i2c_hid_hwreset(struct i2c_client *client) } /* At least some SIS devices need this after reset */ - ret = i2c_hid_set_power(client, I2C_HID_PWR_ON); + if (!(ihid->quirks & I2C_HID_QUIRK_NO_WAKEUP_AFTER_RESET)) + ret = i2c_hid_set_power(client, I2C_HID_PWR_ON); out_unlock: mutex_unlock(&ihid->reset_lock); -- GitLab From 698ab77aebffe08b312fbcdddeb0e8bd08b78717 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 28 Apr 2021 15:03:12 -0400 Subject: [PATCH 0119/3804] dax: Add an enum for specifying dax wakup mode Dan mentioned that he is not very fond of passing around a boolean true/false to specify if only next waiter should be woken up or all waiters should be woken up. He instead prefers that we introduce an enum and make it very explicity at the callsite itself. Easier to read code. This patch should not introduce any change of behavior. Reviewed-by: Greg Kurz Reviewed-by: Jan Kara Suggested-by: Dan Williams Signed-off-by: Vivek Goyal Link: https://lore.kernel.org/r/20210428190314.1865312-2-vgoyal@redhat.com Signed-off-by: Dan Williams --- fs/dax.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index b3d27fdc67752..5ecee51c44ee7 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -144,6 +144,16 @@ struct wait_exceptional_entry_queue { struct exceptional_entry_key key; }; +/** + * enum dax_wake_mode: waitqueue wakeup behaviour + * @WAKE_ALL: wake all waiters in the waitqueue + * @WAKE_NEXT: wake only the first waiter in the waitqueue + */ +enum dax_wake_mode { + WAKE_ALL, + WAKE_NEXT, +}; + static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas, void *entry, struct exceptional_entry_key *key) { @@ -182,7 +192,8 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait, * The important information it's conveying is whether the entry at * this index used to be a PMD entry. */ -static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) +static void dax_wake_entry(struct xa_state *xas, void *entry, + enum dax_wake_mode mode) { struct exceptional_entry_key key; wait_queue_head_t *wq; @@ -196,7 +207,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) * must be in the waitqueue and the following check will see them. */ if (waitqueue_active(wq)) - __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); + __wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, &key); } /* @@ -268,7 +279,7 @@ static void put_unlocked_entry(struct xa_state *xas, void *entry) { /* If we were the only waiter woken, wake the next one */ if (entry && !dax_is_conflict(entry)) - dax_wake_entry(xas, entry, false); + dax_wake_entry(xas, entry, WAKE_NEXT); } /* @@ -286,7 +297,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry) old = xas_store(xas, entry); xas_unlock_irq(xas); BUG_ON(!dax_is_locked(old)); - dax_wake_entry(xas, entry, false); + dax_wake_entry(xas, entry, WAKE_NEXT); } /* @@ -524,7 +535,7 @@ retry: dax_disassociate_entry(entry, mapping, false); xas_store(xas, NULL); /* undo the PMD join */ - dax_wake_entry(xas, entry, true); + dax_wake_entry(xas, entry, WAKE_ALL); mapping->nrexceptional--; entry = NULL; xas_set(xas, index); @@ -937,7 +948,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, xas_lock_irq(xas); xas_store(xas, entry); xas_clear_mark(xas, PAGECACHE_TAG_DIRTY); - dax_wake_entry(xas, entry, false); + dax_wake_entry(xas, entry, WAKE_NEXT); trace_dax_writeback_one(mapping->host, index, count); return ret; -- GitLab From 4c3d043d271d4d629aa2328796cdfc96b37d3b3c Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 28 Apr 2021 15:03:13 -0400 Subject: [PATCH 0120/3804] dax: Add a wakeup mode parameter to put_unlocked_entry() As of now put_unlocked_entry() always wakes up next waiter. In next patches we want to wake up all waiters at one callsite. Hence, add a parameter to the function. This patch does not introduce any change of behavior. Reviewed-by: Greg Kurz Reviewed-by: Jan Kara Suggested-by: Dan Williams Signed-off-by: Vivek Goyal Link: https://lore.kernel.org/r/20210428190314.1865312-3-vgoyal@redhat.com Signed-off-by: Dan Williams --- fs/dax.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 5ecee51c44ee7..56eb1c759ca5c 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -275,11 +275,11 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry) finish_wait(wq, &ewait.wait); } -static void put_unlocked_entry(struct xa_state *xas, void *entry) +static void put_unlocked_entry(struct xa_state *xas, void *entry, + enum dax_wake_mode mode) { - /* If we were the only waiter woken, wake the next one */ if (entry && !dax_is_conflict(entry)) - dax_wake_entry(xas, entry, WAKE_NEXT); + dax_wake_entry(xas, entry, mode); } /* @@ -633,7 +633,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, entry = get_unlocked_entry(&xas, 0); if (entry) page = dax_busy_page(entry); - put_unlocked_entry(&xas, entry); + put_unlocked_entry(&xas, entry, WAKE_NEXT); if (page) break; if (++scanned % XA_CHECK_SCHED) @@ -675,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping, mapping->nrexceptional--; ret = 1; out: - put_unlocked_entry(&xas, entry); + put_unlocked_entry(&xas, entry, WAKE_NEXT); xas_unlock_irq(&xas); return ret; } @@ -954,7 +954,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev, return ret; put_unlocked: - put_unlocked_entry(xas, entry); + put_unlocked_entry(xas, entry, WAKE_NEXT); return ret; } @@ -1695,7 +1695,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) /* Did we race with someone splitting entry or so? */ if (!entry || dax_is_conflict(entry) || (order == 0 && !dax_is_pte_entry(entry))) { - put_unlocked_entry(&xas, entry); + put_unlocked_entry(&xas, entry, WAKE_NEXT); xas_unlock_irq(&xas); trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, VM_FAULT_NOPAGE); -- GitLab From 237388320deffde7c2d65ed8fc9eef670dc979b3 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 28 Apr 2021 15:03:14 -0400 Subject: [PATCH 0121/3804] dax: Wake up all waiters after invalidating dax entry I am seeing missed wakeups which ultimately lead to a deadlock when I am using virtiofs with DAX enabled and running "make -j". I had to mount virtiofs as rootfs and also reduce to dax window size to 256M to reproduce the problem consistently. So here is the problem. put_unlocked_entry() wakes up waiters only if entry is not null as well as !dax_is_conflict(entry). But if I call multiple instances of invalidate_inode_pages2() in parallel, then I can run into a situation where there are waiters on this index but nobody will wake these waiters. invalidate_inode_pages2() invalidate_inode_pages2_range() invalidate_exceptional_entry2() dax_invalidate_mapping_entry_sync() __dax_invalidate_entry() { xas_lock_irq(&xas); entry = get_unlocked_entry(&xas, 0); ... ... dax_disassociate_entry(entry, mapping, trunc); xas_store(&xas, NULL); ... ... put_unlocked_entry(&xas, entry); xas_unlock_irq(&xas); } Say a fault in in progress and it has locked entry at offset say "0x1c". Now say three instances of invalidate_inode_pages2() are in progress (A, B, C) and they all try to invalidate entry at offset "0x1c". Given dax entry is locked, all tree instances A, B, C will wait in wait queue. When dax fault finishes, say A is woken up. It will store NULL entry at index "0x1c" and wake up B. When B comes along it will find "entry=0" at page offset 0x1c and it will call put_unlocked_entry(&xas, 0). And this means put_unlocked_entry() will not wake up next waiter, given the current code. And that means C continues to wait and is not woken up. This patch fixes the issue by waking up all waiters when a dax entry has been invalidated. This seems to fix the deadlock I am facing and I can make forward progress. Reported-by: Sergio Lopez Fixes: ac401cc78242 ("dax: New fault locking") Reviewed-by: Jan Kara Suggested-by: Dan Williams Signed-off-by: Vivek Goyal Link: https://lore.kernel.org/r/20210428190314.1865312-4-vgoyal@redhat.com Signed-off-by: Dan Williams --- fs/dax.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dax.c b/fs/dax.c index 56eb1c759ca5c..df5485b4bddf1 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -675,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping, mapping->nrexceptional--; ret = 1; out: - put_unlocked_entry(&xas, entry, WAKE_NEXT); + put_unlocked_entry(&xas, entry, WAKE_ALL); xas_unlock_irq(&xas); return ret; } -- GitLab From 285c0faddcebdf360412fc9ef9cde63cf98da7f6 Mon Sep 17 00:00:00 2001 From: Bharat Jauhari Date: Thu, 25 Mar 2021 18:15:40 +0200 Subject: [PATCH 0122/3804] habanalabs: expose ASIC specific PLL index Currently the user cannot interpret the PLL information based on index as its exposed as an integer. This commit exposes ASIC specific PLL indexes and maps it to a generic FW compatible index. Signed-off-by: Bharat Jauhari Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 34 +++++++----- drivers/misc/habanalabs/common/habanalabs.h | 16 +++--- drivers/misc/habanalabs/common/sysfs.c | 4 +- drivers/misc/habanalabs/gaudi/gaudi.c | 55 +++++++------------- drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c | 12 ++--- drivers/misc/habanalabs/goya/goya.c | 47 +++++++---------- drivers/misc/habanalabs/goya/goya_hwmgr.c | 40 +++++++------- include/uapi/misc/habanalabs.h | 33 ++++++++++++ 8 files changed, 127 insertions(+), 114 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 832dd5c5bb065..7cf82da67dabf 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -661,18 +661,13 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy) return rc; } -int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, +int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index, enum pll_index *pll_index) { struct asic_fixed_properties *prop = &hdev->asic_prop; u8 pll_byte, pll_bit_off; bool dynamic_pll; - - if (input_pll_index >= PLL_MAX) { - dev_err(hdev->dev, "PLL index %d is out of range\n", - input_pll_index); - return -EINVAL; - } + int fw_pll_idx; dynamic_pll = prop->fw_security_status_valid && (prop->fw_app_security_map & CPU_BOOT_DEV_STS0_DYN_PLL_EN); @@ -680,28 +675,39 @@ int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, if (!dynamic_pll) { /* * in case we are working with legacy FW (each asic has unique - * PLL numbering) extract the legacy numbering + * PLL numbering) use the driver based index as they are + * aligned with fw legacy numbering */ - *pll_index = hdev->legacy_pll_map[input_pll_index]; + *pll_index = input_pll_index; return 0; } + /* retrieve a FW compatible PLL index based on + * ASIC specific user request + */ + fw_pll_idx = hdev->asic_funcs->map_pll_idx_to_fw_idx(input_pll_index); + if (fw_pll_idx < 0) { + dev_err(hdev->dev, "Invalid PLL index (%u) error %d\n", + input_pll_index, fw_pll_idx); + return -EINVAL; + } + /* PLL map is a u8 array */ - pll_byte = prop->cpucp_info.pll_map[input_pll_index >> 3]; - pll_bit_off = input_pll_index & 0x7; + pll_byte = prop->cpucp_info.pll_map[fw_pll_idx >> 3]; + pll_bit_off = fw_pll_idx & 0x7; if (!(pll_byte & BIT(pll_bit_off))) { dev_err(hdev->dev, "PLL index %d is not supported\n", - input_pll_index); + fw_pll_idx); return -EINVAL; } - *pll_index = input_pll_index; + *pll_index = fw_pll_idx; return 0; } -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index, u16 *pll_freq_arr) { struct cpucp_packet pkt; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 44e89da30b4a7..91291a8e201ee 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -930,6 +930,9 @@ enum div_select_defs { * driver is ready to receive asynchronous events. This * function should be called during the first init and * after every hard-reset of the device + * @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event + * @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to + * generic f/w compatible PLL Indexes */ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); @@ -1054,6 +1057,7 @@ struct hl_asic_funcs { u32 block_id, u32 block_size); void (*enable_events_from_fw)(struct hl_device *hdev); void (*get_msi_info)(u32 *table); + int (*map_pll_idx_to_fw_idx)(u32 pll_idx); }; @@ -1950,8 +1954,6 @@ struct hl_mmu_funcs { * @aggregated_cs_counters: aggregated cs counters among all contexts * @mmu_priv: device-specific MMU data. * @mmu_func: device-related MMU functions. - * @legacy_pll_map: map holding map between dynamic (common) PLL indexes and - * static (asic specific) PLL indexes. * @dram_used_mem: current DRAM memory consumption. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This @@ -2071,8 +2073,6 @@ struct hl_device { struct hl_mmu_priv mmu_priv; struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS]; - enum pll_index *legacy_pll_map; - atomic64_t dram_used_mem; u64 timeout_jiffies; u64 max_power; @@ -2387,9 +2387,9 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev, struct hl_info_pci_counters *counters); int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy); -int get_used_pll_index(struct hl_device *hdev, enum pll_index input_pll_index, +int get_used_pll_index(struct hl_device *hdev, u32 input_pll_index, enum pll_index *pll_index); -int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, enum pll_index pll_index, +int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u32 pll_index, u16 *pll_freq_arr); int hl_fw_cpucp_power_get(struct hl_device *hdev, u64 *power); int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg, @@ -2411,9 +2411,9 @@ int hl_pci_set_outbound_region(struct hl_device *hdev, int hl_pci_init(struct hl_device *hdev); void hl_pci_fini(struct hl_device *hdev); -long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr); -void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq); int hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr, long *value); diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c index 9fa61573a89de..c9f649b31e3a9 100644 --- a/drivers/misc/habanalabs/common/sysfs.c +++ b/drivers/misc/habanalabs/common/sysfs.c @@ -9,7 +9,7 @@ #include -long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, +long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr) { struct cpucp_packet pkt; @@ -44,7 +44,7 @@ long hl_get_frequency(struct hl_device *hdev, enum pll_index pll_index, return (long) result; } -void hl_set_frequency(struct hl_device *hdev, enum pll_index pll_index, +void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq) { struct cpucp_packet pkt; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index b751652f80a8c..81155f06c126e 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -105,36 +105,6 @@ #define GAUDI_PLL_MAX 10 -/* - * this enum kept here for compatibility with old FW (in which each asic has - * unique PLL numbering - */ -enum gaudi_pll_index { - GAUDI_CPU_PLL = 0, - GAUDI_PCI_PLL, - GAUDI_SRAM_PLL, - GAUDI_HBM_PLL, - GAUDI_NIC_PLL, - GAUDI_DMA_PLL, - GAUDI_MESH_PLL, - GAUDI_MME_PLL, - GAUDI_TPC_PLL, - GAUDI_IF_PLL, -}; - -static enum pll_index gaudi_pll_map[PLL_MAX] = { - [CPU_PLL] = GAUDI_CPU_PLL, - [PCI_PLL] = GAUDI_PCI_PLL, - [SRAM_PLL] = GAUDI_SRAM_PLL, - [HBM_PLL] = GAUDI_HBM_PLL, - [NIC_PLL] = GAUDI_NIC_PLL, - [DMA_PLL] = GAUDI_DMA_PLL, - [MESH_PLL] = GAUDI_MESH_PLL, - [MME_PLL] = GAUDI_MME_PLL, - [TPC_PLL] = GAUDI_TPC_PLL, - [IF_PLL] = GAUDI_IF_PLL, -}; - static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = { "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3", "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3", @@ -810,7 +780,7 @@ static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) freq = 0; } } else { - rc = hl_fw_cpucp_pll_info_get(hdev, CPU_PLL, pll_freq_arr); + rc = hl_fw_cpucp_pll_info_get(hdev, HL_GAUDI_CPU_PLL, pll_freq_arr); if (rc) return rc; @@ -1652,9 +1622,6 @@ static int gaudi_sw_init(struct hl_device *hdev) hdev->asic_specific = gaudi; - /* store legacy PLL map */ - hdev->legacy_pll_map = gaudi_pll_map; - /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0); @@ -8783,6 +8750,23 @@ static void gaudi_enable_events_from_fw(struct hl_device *hdev) WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); } +static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx) +{ + switch (pll_idx) { + case HL_GAUDI_CPU_PLL: return CPU_PLL; + case HL_GAUDI_PCI_PLL: return PCI_PLL; + case HL_GAUDI_NIC_PLL: return NIC_PLL; + case HL_GAUDI_DMA_PLL: return DMA_PLL; + case HL_GAUDI_MESH_PLL: return MESH_PLL; + case HL_GAUDI_MME_PLL: return MME_PLL; + case HL_GAUDI_TPC_PLL: return TPC_PLL; + case HL_GAUDI_IF_PLL: return IF_PLL; + case HL_GAUDI_SRAM_PLL: return SRAM_PLL; + case HL_GAUDI_HBM_PLL: return HBM_PLL; + default: return -EINVAL; + } +} + static const struct hl_asic_funcs gaudi_funcs = { .early_init = gaudi_early_init, .early_fini = gaudi_early_fini, @@ -8866,7 +8850,8 @@ static const struct hl_asic_funcs gaudi_funcs = { .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, .get_hw_block_id = gaudi_get_hw_block_id, .hw_block_mmap = gaudi_block_mmap, - .enable_events_from_fw = gaudi_enable_events_from_fw + .enable_events_from_fw = gaudi_enable_events_from_fw, + .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx }; /** diff --git a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c index 8c49da4bcbd58..9b60eadd4c355 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_hwmgr.c @@ -13,7 +13,7 @@ void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) struct gaudi_device *gaudi = hdev->asic_specific; if (freq == PLL_LAST) - hl_set_frequency(hdev, MME_PLL, gaudi->max_freq_value); + hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value); } int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) @@ -23,7 +23,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, false); + value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false); if (value < 0) { dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", @@ -33,7 +33,7 @@ int gaudi_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) *max_clk = (value / 1000 / 1000); - value = hl_get_frequency(hdev, MME_PLL, true); + value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true); if (value < 0) { dev_err(hdev->dev, @@ -57,7 +57,7 @@ static ssize_t clk_max_freq_mhz_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, false); + value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, false); gaudi->max_freq_value = value; @@ -85,7 +85,7 @@ static ssize_t clk_max_freq_mhz_store(struct device *dev, gaudi->max_freq_value = value * 1000 * 1000; - hl_set_frequency(hdev, MME_PLL, gaudi->max_freq_value); + hl_set_frequency(hdev, HL_GAUDI_MME_PLL, gaudi->max_freq_value); fail: return count; @@ -100,7 +100,7 @@ static ssize_t clk_cur_freq_mhz_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, true); + value = hl_get_frequency(hdev, HL_GAUDI_MME_PLL, true); return sprintf(buf, "%lu\n", (value / 1000 / 1000)); } diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index e27338f4aad2f..e0ad2a269779b 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -118,30 +118,6 @@ #define IS_MME_IDLE(mme_arch_sts) \ (((mme_arch_sts) & MME_ARCH_IDLE_MASK) == MME_ARCH_IDLE_MASK) -/* - * this enum kept here for compatibility with old FW (in which each asic has - * unique PLL numbering - */ -enum goya_pll_index { - GOYA_CPU_PLL = 0, - GOYA_IC_PLL, - GOYA_MC_PLL, - GOYA_MME_PLL, - GOYA_PCI_PLL, - GOYA_EMMC_PLL, - GOYA_TPC_PLL, -}; - -static enum pll_index goya_pll_map[PLL_MAX] = { - [CPU_PLL] = GOYA_CPU_PLL, - [IC_PLL] = GOYA_IC_PLL, - [MC_PLL] = GOYA_MC_PLL, - [MME_PLL] = GOYA_MME_PLL, - [PCI_PLL] = GOYA_PCI_PLL, - [EMMC_PLL] = GOYA_EMMC_PLL, - [TPC_PLL] = GOYA_TPC_PLL, -}; - static const char goya_irq_name[GOYA_MSIX_ENTRIES][GOYA_MAX_STRING_LEN] = { "goya cq 0", "goya cq 1", "goya cq 2", "goya cq 3", "goya cq 4", "goya cpu eq" @@ -775,7 +751,8 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev) freq = 0; } } else { - rc = hl_fw_cpucp_pll_info_get(hdev, PCI_PLL, pll_freq_arr); + rc = hl_fw_cpucp_pll_info_get(hdev, HL_GOYA_PCI_PLL, + pll_freq_arr); if (rc) return; @@ -897,9 +874,6 @@ static int goya_sw_init(struct hl_device *hdev) hdev->asic_specific = goya; - /* store legacy PLL map */ - hdev->legacy_pll_map = goya_pll_map; - /* Create DMA pool for small allocations */ hdev->dma_pool = dma_pool_create(dev_name(hdev->dev), &hdev->pdev->dev, GOYA_DMA_POOL_BLK_SIZE, 8, 0); @@ -5512,6 +5486,20 @@ static void goya_enable_events_from_fw(struct hl_device *hdev) GOYA_ASYNC_EVENT_ID_INTS_REGISTER); } +static int goya_map_pll_idx_to_fw_idx(u32 pll_idx) +{ + switch (pll_idx) { + case HL_GOYA_CPU_PLL: return CPU_PLL; + case HL_GOYA_PCI_PLL: return PCI_PLL; + case HL_GOYA_MME_PLL: return MME_PLL; + case HL_GOYA_TPC_PLL: return TPC_PLL; + case HL_GOYA_IC_PLL: return IC_PLL; + case HL_GOYA_MC_PLL: return MC_PLL; + case HL_GOYA_EMMC_PLL: return EMMC_PLL; + default: return -EINVAL; + } +} + static const struct hl_asic_funcs goya_funcs = { .early_init = goya_early_init, .early_fini = goya_early_fini, @@ -5595,7 +5583,8 @@ static const struct hl_asic_funcs goya_funcs = { .ack_protection_bits_errors = goya_ack_protection_bits_errors, .get_hw_block_id = goya_get_hw_block_id, .hw_block_mmap = goya_block_mmap, - .enable_events_from_fw = goya_enable_events_from_fw + .enable_events_from_fw = goya_enable_events_from_fw, + .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx }; /* diff --git a/drivers/misc/habanalabs/goya/goya_hwmgr.c b/drivers/misc/habanalabs/goya/goya_hwmgr.c index 3acb36a1a902e..7d007125727ff 100644 --- a/drivers/misc/habanalabs/goya/goya_hwmgr.c +++ b/drivers/misc/habanalabs/goya/goya_hwmgr.c @@ -13,19 +13,19 @@ void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq) switch (freq) { case PLL_HIGH: - hl_set_frequency(hdev, MME_PLL, hdev->high_pll); - hl_set_frequency(hdev, TPC_PLL, hdev->high_pll); - hl_set_frequency(hdev, IC_PLL, hdev->high_pll); + hl_set_frequency(hdev, HL_GOYA_MME_PLL, hdev->high_pll); + hl_set_frequency(hdev, HL_GOYA_TPC_PLL, hdev->high_pll); + hl_set_frequency(hdev, HL_GOYA_IC_PLL, hdev->high_pll); break; case PLL_LOW: - hl_set_frequency(hdev, MME_PLL, GOYA_PLL_FREQ_LOW); - hl_set_frequency(hdev, TPC_PLL, GOYA_PLL_FREQ_LOW); - hl_set_frequency(hdev, IC_PLL, GOYA_PLL_FREQ_LOW); + hl_set_frequency(hdev, HL_GOYA_MME_PLL, GOYA_PLL_FREQ_LOW); + hl_set_frequency(hdev, HL_GOYA_TPC_PLL, GOYA_PLL_FREQ_LOW); + hl_set_frequency(hdev, HL_GOYA_IC_PLL, GOYA_PLL_FREQ_LOW); break; case PLL_LAST: - hl_set_frequency(hdev, MME_PLL, goya->mme_clk); - hl_set_frequency(hdev, TPC_PLL, goya->tpc_clk); - hl_set_frequency(hdev, IC_PLL, goya->ic_clk); + hl_set_frequency(hdev, HL_GOYA_MME_PLL, goya->mme_clk); + hl_set_frequency(hdev, HL_GOYA_TPC_PLL, goya->tpc_clk); + hl_set_frequency(hdev, HL_GOYA_IC_PLL, goya->ic_clk); break; default: dev_err(hdev->dev, "unknown frequency setting\n"); @@ -39,7 +39,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, false); + value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false); if (value < 0) { dev_err(hdev->dev, "Failed to retrieve device max clock %ld\n", @@ -49,7 +49,7 @@ int goya_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk) *max_clk = (value / 1000 / 1000); - value = hl_get_frequency(hdev, MME_PLL, true); + value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true); if (value < 0) { dev_err(hdev->dev, @@ -72,7 +72,7 @@ static ssize_t mme_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, false); + value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, false); if (value < 0) return value; @@ -105,7 +105,7 @@ static ssize_t mme_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, MME_PLL, value); + hl_set_frequency(hdev, HL_GOYA_MME_PLL, value); goya->mme_clk = value; fail: @@ -121,7 +121,7 @@ static ssize_t tpc_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, TPC_PLL, false); + value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, false); if (value < 0) return value; @@ -154,7 +154,7 @@ static ssize_t tpc_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, TPC_PLL, value); + hl_set_frequency(hdev, HL_GOYA_TPC_PLL, value); goya->tpc_clk = value; fail: @@ -170,7 +170,7 @@ static ssize_t ic_clk_show(struct device *dev, struct device_attribute *attr, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, IC_PLL, false); + value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, false); if (value < 0) return value; @@ -203,7 +203,7 @@ static ssize_t ic_clk_store(struct device *dev, struct device_attribute *attr, goto fail; } - hl_set_frequency(hdev, IC_PLL, value); + hl_set_frequency(hdev, HL_GOYA_IC_PLL, value); goya->ic_clk = value; fail: @@ -219,7 +219,7 @@ static ssize_t mme_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, MME_PLL, true); + value = hl_get_frequency(hdev, HL_GOYA_MME_PLL, true); if (value < 0) return value; @@ -236,7 +236,7 @@ static ssize_t tpc_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, TPC_PLL, true); + value = hl_get_frequency(hdev, HL_GOYA_TPC_PLL, true); if (value < 0) return value; @@ -253,7 +253,7 @@ static ssize_t ic_clk_curr_show(struct device *dev, if (!hl_device_operational(hdev, NULL)) return -ENODEV; - value = hl_get_frequency(hdev, IC_PLL, true); + value = hl_get_frequency(hdev, HL_GOYA_IC_PLL, true); if (value < 0) return value; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index d3e017b5f0dba..6d2d34c9f375f 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -239,6 +239,39 @@ enum gaudi_engine_id { GAUDI_ENGINE_ID_SIZE }; +/* + * ASIC specific PLL index + * + * Used to retrieve in frequency info of different IPs via + * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be + * used as an index in struct hl_pll_frequency_info + */ + +enum hl_goya_pll_index { + HL_GOYA_CPU_PLL = 0, + HL_GOYA_IC_PLL, + HL_GOYA_MC_PLL, + HL_GOYA_MME_PLL, + HL_GOYA_PCI_PLL, + HL_GOYA_EMMC_PLL, + HL_GOYA_TPC_PLL, + HL_GOYA_PLL_MAX +}; + +enum hl_gaudi_pll_index { + HL_GAUDI_CPU_PLL = 0, + HL_GAUDI_PCI_PLL, + HL_GAUDI_SRAM_PLL, + HL_GAUDI_HBM_PLL, + HL_GAUDI_NIC_PLL, + HL_GAUDI_DMA_PLL, + HL_GAUDI_MESH_PLL, + HL_GAUDI_MME_PLL, + HL_GAUDI_TPC_PLL, + HL_GAUDI_IF_PLL, + HL_GAUDI_PLL_MAX +}; + enum hl_device_status { HL_DEVICE_STATUS_OPERATIONAL, HL_DEVICE_STATUS_IN_RESET, -- GitLab From 001d5f66c156f2c30b6bf85346de09de8db49b59 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 11 Apr 2021 21:06:05 +0300 Subject: [PATCH 0123/3804] habanalabs: skip reading f/w errors on bad status If we read all FF from the boot status register, then something is totally wrong and there is no point of reading specific errors. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 7cf82da67dabf..fff29f057b6d3 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -850,8 +850,13 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg, if (rc) { dev_err(hdev->dev, "Failed to read preboot version\n"); detect_cpu_boot_status(hdev, status); - fw_read_errors(hdev, boot_err0_reg, - cpu_security_boot_status_reg); + + /* If we read all FF, then something is totally wrong, no point + * of reading specific errors + */ + if (status != -1) + fw_read_errors(hdev, boot_err0_reg, + cpu_security_boot_status_reg); return -EIO; } -- GitLab From b5fd82a7af198db04408e218f64dc3d4178d585a Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 12 Apr 2021 09:38:22 +0300 Subject: [PATCH 0124/3804] habanalabs: change error level of security not ready This error indicates a problem in the security initialization inside the f/w so we need to stop the device loading because it won't be usable. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index fff29f057b6d3..377a7ca886feb 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -362,12 +362,9 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, } if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY) { - dev_warn(hdev->dev, + dev_err(hdev->dev, "Device boot warning - security not ready\n"); - /* This is a warning so we don't want it to disable the - * device - */ - err_val &= ~CPU_BOOT_ERR0_SECURITY_NOT_RDY; + err_exists = true; } if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL) { -- GitLab From 27a9e35daad080f3770401a1a11eda2f9f7732dd Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 12 Apr 2021 09:52:05 +0300 Subject: [PATCH 0125/3804] habanalabs: ignore f/w status error In case firmware has a bug and erroneously reports a status error (e.g. device unusable) during boot, allow the user to tell the driver to continue the boot regardless of the error status. This will be done via kernel parameter which exposes a mask. The user that loads the driver can decide exactly which status error to ignore and which to take into account. The bitmask is according to defines in hl_boot_if.h Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 3 ++- drivers/misc/habanalabs/common/habanalabs.h | 7 +++++++ drivers/misc/habanalabs/common/habanalabs_drv.c | 7 +++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index 377a7ca886feb..0713b2c12d54f 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -400,7 +400,8 @@ static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg, err_exists = true; } - if (err_exists) + if (err_exists && ((err_val & ~CPU_BOOT_ERR0_ENABLED) & + lower_32_bits(hdev->boot_error_status_mask))) return -EIO; return 0; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 91291a8e201ee..6579f8767abda 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1962,6 +1962,12 @@ struct hl_mmu_funcs { * @clock_gating_mask: is clock gating enabled. bitmask that represents the * different engines. See debugfs-driver-habanalabs for * details. + * @boot_error_status_mask: contains a mask of the device boot error status. + * Each bit represents a different error, according to + * the defines in hl_boot_if.h. If the bit is cleared, + * the error will be ignored by the driver during + * device initialization. Mainly used to debug and + * workaround firmware bugs * @in_reset: is device in reset flow. * @curr_pll_profile: current PLL profile. * @card_type: Various ASICs have several card types. This indicates the card @@ -2077,6 +2083,7 @@ struct hl_device { u64 timeout_jiffies; u64 max_power; u64 clock_gating_mask; + u64 boot_error_status_mask; atomic_t in_reset; enum hl_pll_frequency curr_pll_profile; enum cpucp_card_types card_type; diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c index 7135f1e038641..64d1530db9854 100644 --- a/drivers/misc/habanalabs/common/habanalabs_drv.c +++ b/drivers/misc/habanalabs/common/habanalabs_drv.c @@ -30,6 +30,7 @@ static DEFINE_MUTEX(hl_devs_idr_lock); static int timeout_locked = 30; static int reset_on_lockup = 1; static int memory_scrub = 1; +static ulong boot_error_status_mask = ULONG_MAX; module_param(timeout_locked, int, 0444); MODULE_PARM_DESC(timeout_locked, @@ -43,6 +44,10 @@ module_param(memory_scrub, int, 0444); MODULE_PARM_DESC(memory_scrub, "Scrub device memory in various states (0 = no, 1 = yes, default yes)"); +module_param(boot_error_status_mask, ulong, 0444); +MODULE_PARM_DESC(boot_error_status_mask, + "Mask of the error status during device CPU boot (If bitX is cleared then error X is masked. Default all 1's)"); + #define PCI_VENDOR_ID_HABANALABS 0x1da3 #define PCI_IDS_GOYA 0x0001 @@ -319,6 +324,8 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev, hdev->major = hl_major; hdev->reset_on_lockup = reset_on_lockup; hdev->memory_scrub = memory_scrub; + hdev->boot_error_status_mask = boot_error_status_mask; + hdev->pldm = 0; set_driver_behavior_per_device(hdev); -- GitLab From 24a107097fbd8fb6a48a0dcb31e64c1de6831a1d Mon Sep 17 00:00:00 2001 From: Ofir Bitton Date: Tue, 27 Apr 2021 17:49:25 +0300 Subject: [PATCH 0126/3804] habanalabs: wait for interrupt wrong timeout calculation Wait for interrupt timeout calculation is wrong, hence timeout occurs when user waits on an interrupt with certain timeout values. Signed-off-by: Ofir Bitton Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_submission.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index ff8791a651fd1..af3c497defb10 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -2017,7 +2017,7 @@ wait_again: if (completion_value >= target_value) { *status = CS_WAIT_STATUS_COMPLETED; } else { - timeout -= jiffies_to_usecs(completion_rc); + timeout = completion_rc; goto wait_again; } } else { -- GitLab From 115726c5d312b462c9d9931ea42becdfa838a076 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Mon, 26 Apr 2021 06:43:46 -0700 Subject: [PATCH 0127/3804] habanalabs/gaudi: Fix a potential use after free in gaudi_memset_device_memory Our code analyzer reported a uaf. In gaudi_memset_device_memory, cb is get via hl_cb_kernel_create() with 2 refcount. If hl_cs_allocate_job() failed, the execution runs into release_cb branch. One ref of cb is dropped by hl_cb_put(cb) and could be freed if other thread also drops one ref. Then cb is used by cb->id later, which is a potential uaf. My patch add a variable 'id' to accept the value of cb->id before the hl_cb_put(cb) is called, to avoid the potential uaf. Fixes: 423815bf02e25 ("habanalabs/gaudi: remove PCI access to SM block") Signed-off-by: Lv Yunlong Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/gaudi/gaudi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 81155f06c126e..9e4a6bb3acd11 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -5579,6 +5579,7 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, struct hl_cs_job *job; u32 cb_size, ctl, err_cause; struct hl_cb *cb; + u64 id; int rc; cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); @@ -5645,8 +5646,9 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, } release_cb: + id = cb->id; hl_cb_put(cb); - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT); return rc; } -- GitLab From a298232ee6b9a1d5d732aa497ff8be0d45b5bd82 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Fri, 7 May 2021 21:06:38 +0100 Subject: [PATCH 0128/3804] io_uring: fix link timeout refs WARNING: CPU: 0 PID: 10242 at lib/refcount.c:28 refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28 RIP: 0010:refcount_warn_saturate+0x15b/0x1a0 lib/refcount.c:28 Call Trace: __refcount_sub_and_test include/linux/refcount.h:283 [inline] __refcount_dec_and_test include/linux/refcount.h:315 [inline] refcount_dec_and_test include/linux/refcount.h:333 [inline] io_put_req fs/io_uring.c:2140 [inline] io_queue_linked_timeout fs/io_uring.c:6300 [inline] __io_queue_sqe+0xbef/0xec0 fs/io_uring.c:6354 io_submit_sqe fs/io_uring.c:6534 [inline] io_submit_sqes+0x2bbd/0x7c50 fs/io_uring.c:6660 __do_sys_io_uring_enter fs/io_uring.c:9240 [inline] __se_sys_io_uring_enter+0x256/0x1d60 fs/io_uring.c:9182 io_link_timeout_fn() should put only one reference of the linked timeout request, however in case of racing with the master request's completion first io_req_complete() puts one and then io_put_req_deferred() is called. Cc: stable@vger.kernel.org # 5.12+ Fixes: 9ae1f8dd372e0 ("io_uring: fix inconsistent lock state") Reported-by: syzbot+a2910119328ce8e7996f@syzkaller.appspotmail.com Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/ff51018ff29de5ffa76f09273ef48cb24c720368.1620417627.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index f46acbbeed57c..9ac5e278a91e6 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -6363,10 +6363,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) if (prev) { io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME); io_put_req_deferred(prev, 1); + io_put_req_deferred(req, 1); } else { io_req_complete_post(req, -ETIME, 0); } - io_put_req_deferred(req, 1); return HRTIMER_NORESTART; } -- GitLab From c1b55029493879f5bd585ff79f326e71f0bc05e3 Mon Sep 17 00:00:00 2001 From: Daniel Cordova A Date: Fri, 7 May 2021 12:31:16 -0500 Subject: [PATCH 0129/3804] ALSA: hda: fixup headset for ASUS GU502 laptop The GU502 requires a few steps to make headset i/o works properly: pincfg, verbs to unmute headphone out and callback to toggle output between speakers and headphone using jack. Signed-off-by: Daniel Cordova A Cc: Link: https://lore.kernel.org/r/20210507173116.12043-1-danesc87@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 62 +++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index a5f3e78ec04e7..b4b71609dff11 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6254,6 +6254,35 @@ static void alc294_fixup_gx502_hp(struct hda_codec *codec, } } +static void alc294_gu502_toggle_output(struct hda_codec *codec, + struct hda_jack_callback *cb) +{ + /* Windows sets 0x10 to 0x8420 for Node 0x20 which is + * responsible from changes between speakers and headphones + */ + if (snd_hda_jack_detect_state(codec, 0x21) == HDA_JACK_PRESENT) + alc_write_coef_idx(codec, 0x10, 0x8420); + else + alc_write_coef_idx(codec, 0x10, 0x0a20); +} + +static void alc294_fixup_gu502_hp(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (!is_jack_detectable(codec, 0x21)) + return; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_jack_detect_enable_callback(codec, 0x21, + alc294_gu502_toggle_output); + break; + case HDA_FIXUP_ACT_INIT: + alc294_gu502_toggle_output(codec, NULL); + break; + } +} + static void alc285_fixup_hp_gpio_amp_init(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -6471,6 +6500,9 @@ enum { ALC294_FIXUP_ASUS_GX502_HP, ALC294_FIXUP_ASUS_GX502_PINS, ALC294_FIXUP_ASUS_GX502_VERBS, + ALC294_FIXUP_ASUS_GU502_HP, + ALC294_FIXUP_ASUS_GU502_PINS, + ALC294_FIXUP_ASUS_GU502_VERBS, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, ALC236_FIXUP_HP_GPIO_LED, @@ -7712,6 +7744,35 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc294_fixup_gx502_hp, }, + [ALC294_FIXUP_ASUS_GU502_PINS] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a11050 }, /* rear HP mic */ + { 0x1a, 0x01a11830 }, /* rear external mic */ + { 0x21, 0x012110f0 }, /* rear HP out */ + { } + }, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_GU502_VERBS + }, + [ALC294_FIXUP_ASUS_GU502_VERBS] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* set 0x15 to HP-OUT ctrl */ + { 0x15, AC_VERB_SET_PIN_WIDGET_CONTROL, 0xc0 }, + /* unmute the 0x15 amp */ + { 0x15, AC_VERB_SET_AMP_GAIN_MUTE, 0xb000 }, + /* set 0x1b to HP-OUT */ + { 0x1b, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x24 }, + { } + }, + .chained = true, + .chain_id = ALC294_FIXUP_ASUS_GU502_HP + }, + [ALC294_FIXUP_ASUS_GU502_HP] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc294_fixup_gu502_hp, + }, [ALC294_FIXUP_ASUS_COEF_1B] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -8256,6 +8317,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502), + SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), -- GitLab From 4eff124347191d1548eb4e14e20e77513dcbd0fe Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 9 May 2021 12:11:02 +0300 Subject: [PATCH 0130/3804] openrisc: mm/init.c: remove unused memblock_region variable in map_ram() Kernel test robot reports: cppcheck possible warnings: (new ones prefixed by >>, may not real problems) >> arch/openrisc/mm/init.c:125:10: warning: Uninitialized variable: region [uninitvar] region->base, region->base + region->size); ^ Replace usage of memblock_region fields with 'start' and 'end' variables that are initialized in for_each_mem_range() and remove the declaration of region. Fixes: b10d6bca8720 ("arch, drivers: replace for_each_membock() with for_each_mem_range()") Reported-by: kernel test robot Signed-off-by: Mike Rapoport Signed-off-by: Stafford Horne --- arch/openrisc/mm/init.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index bf9b2310fc936..f3fa02b8838af 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -75,7 +75,6 @@ static void __init map_ram(void) /* These mark extents of read-only kernel pages... * ...from vmlinux.lds.S */ - struct memblock_region *region; v = PAGE_OFFSET; @@ -121,7 +120,7 @@ static void __init map_ram(void) } printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__, - region->base, region->base + region->size); + start, end); } } -- GitLab From 371dcaee1ade4b1eefd541ae6ee048b5ce15b37c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sun, 9 May 2021 12:11:03 +0300 Subject: [PATCH 0131/3804] openrisc: mm/init.c: remove unused variable 'end' in paging_init() A build with W=1 enabled produces the following warning: CC arch/openrisc/mm/init.o arch/openrisc/mm/init.c: In function 'paging_init': arch/openrisc/mm/init.c:131:16: warning: variable 'end' set but not used [-Wunused-but-set-variable] 131 | unsigned long end; | ^~~ Remove the unused variable 'end'. Signed-off-by: Mike Rapoport Signed-off-by: Stafford Horne --- arch/openrisc/mm/init.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c index f3fa02b8838af..6e38ec96cab89 100644 --- a/arch/openrisc/mm/init.c +++ b/arch/openrisc/mm/init.c @@ -128,7 +128,6 @@ void __init paging_init(void) { extern void tlb_init(void); - unsigned long end; int i; printk(KERN_INFO "Setting up paging and PTEs.\n"); @@ -144,8 +143,6 @@ void __init paging_init(void) */ current_pgd[smp_processor_id()] = init_mm.pgd; - end = (unsigned long)__va(max_low_pfn * PAGE_SIZE); - map_ram(); zone_sizes_init(); -- GitLab From e759959fe3b8313c81d6200be44cb8a644d845ea Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Tue, 27 Apr 2021 06:16:34 -0500 Subject: [PATCH 0132/3804] x86/sev-es: Rename sev-es.{ch} to sev.{ch} SEV-SNP builds upon the SEV-ES functionality while adding new hardware protection. Version 2 of the GHCB specification adds new NAE events that are SEV-SNP specific. Rename the sev-es.{ch} to sev.{ch} so that all SEV* functionality can be consolidated in one place. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Acked-by: Joerg Roedel Link: https://lkml.kernel.org/r/20210427111636.1207-2-brijesh.singh@amd.com --- arch/x86/boot/compressed/Makefile | 6 +++--- arch/x86/boot/compressed/{sev-es.c => sev.c} | 4 ++-- arch/x86/include/asm/{sev-es.h => sev.h} | 0 arch/x86/kernel/Makefile | 6 +++--- arch/x86/kernel/head64.c | 2 +- arch/x86/kernel/nmi.c | 2 +- arch/x86/kernel/{sev-es-shared.c => sev-shared.c} | 0 arch/x86/kernel/{sev-es.c => sev.c} | 4 ++-- arch/x86/mm/extable.c | 2 +- arch/x86/platform/efi/efi_64.c | 2 +- arch/x86/realmode/init.c | 2 +- 11 files changed, 15 insertions(+), 15 deletions(-) rename arch/x86/boot/compressed/{sev-es.c => sev.c} (98%) rename arch/x86/include/asm/{sev-es.h => sev.h} (100%) rename arch/x86/kernel/{sev-es-shared.c => sev-shared.c} (100%) rename arch/x86/kernel/{sev-es.c => sev.c} (99%) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6e5522aebbbd4..2a2975236c9e3 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -48,10 +48,10 @@ KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no) KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h KBUILD_CFLAGS += $(CLANG_FLAGS) -# sev-es.c indirectly inludes inat-table.h which is generated during +# sev.c indirectly inludes inat-table.h which is generated during # compilation and stored in $(objtree). Add the directory to the includes so # that the compiler finds it even with out-of-tree builds (make O=/some/path). -CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/ +CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/ KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ GCOV_PROFILE := n @@ -93,7 +93,7 @@ ifdef CONFIG_X86_64 vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o vmlinux-objs-y += $(obj)/mem_encrypt.o vmlinux-objs-y += $(obj)/pgtable_64.o - vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o + vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o endif vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev.c similarity index 98% rename from arch/x86/boot/compressed/sev-es.c rename to arch/x86/boot/compressed/sev.c index 82041bd380e56..670e998fe9306 100644 --- a/arch/x86/boot/compressed/sev-es.c +++ b/arch/x86/boot/compressed/sev.c @@ -13,7 +13,7 @@ #include "misc.h" #include -#include +#include #include #include #include @@ -117,7 +117,7 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, #include "../../lib/insn.c" /* Include code for early handlers */ -#include "../../kernel/sev-es-shared.c" +#include "../../kernel/sev-shared.c" static bool early_setup_sev_es(void) { diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev.h similarity index 100% rename from arch/x86/include/asm/sev-es.h rename to arch/x86/include/asm/sev.h diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0704c2a94272c..0f66682ac02a6 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -20,7 +20,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg -CFLAGS_REMOVE_sev-es.o = -pg +CFLAGS_REMOVE_sev.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n @@ -28,7 +28,7 @@ KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n -KASAN_SANITIZE_sev-es.o := n +KASAN_SANITIZE_sev.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. @@ -148,7 +148,7 @@ obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o -obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev-es.o +obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 18be44163a50f..de01903c37355 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include /* * Manage page tables very early on. diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2ef961cf4cfc5..4bce802d25fb1 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #define CREATE_TRACE_POINTS #include diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-shared.c similarity index 100% rename from arch/x86/kernel/sev-es-shared.c rename to arch/x86/kernel/sev-shared.c diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev.c similarity index 99% rename from arch/x86/kernel/sev-es.c rename to arch/x86/kernel/sev.c index 73873b0078380..9578c82832aa2 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include @@ -459,7 +459,7 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt } /* Include code shared with pre-decompression boot stage */ -#include "sev-es-shared.c" +#include "sev-shared.c" void noinstr __sev_es_nmi_complete(void) { diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index b93d6cd08a7ff..121921b2927cb 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index df7b5477fc4f2..7515e78ef8983 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include /* * We allocate runtime services regions top-down, starting from -4G, i.e. diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 1be71ef5e4c4e..2e1c1bec0f9e7 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include struct real_mode_header *real_mode_header; u32 *trampoline_cr4_features; -- GitLab From b81fc74d53d1248de6db3136dd6b29e5d5528021 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Tue, 27 Apr 2021 06:16:35 -0500 Subject: [PATCH 0133/3804] x86/sev: Move GHCB MSR protocol and NAE definitions in a common header The guest and the hypervisor contain separate macros to get and set the GHCB MSR protocol and NAE event fields. Consolidate the GHCB protocol definitions and helper macros in one place. Leave the supported protocol version define in separate files to keep the guest and hypervisor flexibility to support different GHCB version in the same release. There is no functional change intended. Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Acked-by: Joerg Roedel Link: https://lkml.kernel.org/r/20210427111636.1207-3-brijesh.singh@amd.com --- arch/x86/include/asm/sev-common.h | 62 +++++++++++++++++++++++++++++++ arch/x86/include/asm/sev.h | 30 ++------------- arch/x86/kernel/sev-shared.c | 20 +++++----- arch/x86/kvm/svm/svm.h | 38 ++----------------- 4 files changed, 80 insertions(+), 70 deletions(-) create mode 100644 arch/x86/include/asm/sev-common.h diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h new file mode 100644 index 0000000000000..629c3df243f03 --- /dev/null +++ b/arch/x86/include/asm/sev-common.h @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * AMD SEV header common between the guest and the hypervisor. + * + * Author: Brijesh Singh + */ + +#ifndef __ASM_X86_SEV_COMMON_H +#define __ASM_X86_SEV_COMMON_H + +#define GHCB_MSR_INFO_POS 0 +#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) + +#define GHCB_MSR_SEV_INFO_RESP 0x001 +#define GHCB_MSR_SEV_INFO_REQ 0x002 +#define GHCB_MSR_VER_MAX_POS 48 +#define GHCB_MSR_VER_MAX_MASK 0xffff +#define GHCB_MSR_VER_MIN_POS 32 +#define GHCB_MSR_VER_MIN_MASK 0xffff +#define GHCB_MSR_CBIT_POS 24 +#define GHCB_MSR_CBIT_MASK 0xff +#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ + ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ + (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ + (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ + GHCB_MSR_SEV_INFO_RESP) +#define GHCB_MSR_INFO(v) ((v) & 0xfffUL) +#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK) +#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK) + +#define GHCB_MSR_CPUID_REQ 0x004 +#define GHCB_MSR_CPUID_RESP 0x005 +#define GHCB_MSR_CPUID_FUNC_POS 32 +#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff +#define GHCB_MSR_CPUID_VALUE_POS 32 +#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff +#define GHCB_MSR_CPUID_REG_POS 30 +#define GHCB_MSR_CPUID_REG_MASK 0x3 +#define GHCB_CPUID_REQ_EAX 0 +#define GHCB_CPUID_REQ_EBX 1 +#define GHCB_CPUID_REQ_ECX 2 +#define GHCB_CPUID_REQ_EDX 3 +#define GHCB_CPUID_REQ(fn, reg) \ + (GHCB_MSR_CPUID_REQ | \ + (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ + (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) + +#define GHCB_MSR_TERM_REQ 0x100 +#define GHCB_MSR_TERM_REASON_SET_POS 12 +#define GHCB_MSR_TERM_REASON_SET_MASK 0xf +#define GHCB_MSR_TERM_REASON_POS 16 +#define GHCB_MSR_TERM_REASON_MASK 0xff +#define GHCB_SEV_TERM_REASON(reason_set, reason_val) \ + (((((u64)reason_set) & GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \ + ((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS)) + +#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 +#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 + +#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK) + +#endif diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index cf1d957c70919..fa5cd05d3b5be 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -10,34 +10,12 @@ #include #include +#include -#define GHCB_SEV_INFO 0x001UL -#define GHCB_SEV_INFO_REQ 0x002UL -#define GHCB_INFO(v) ((v) & 0xfffUL) -#define GHCB_PROTO_MAX(v) (((v) >> 48) & 0xffffUL) -#define GHCB_PROTO_MIN(v) (((v) >> 32) & 0xffffUL) -#define GHCB_PROTO_OUR 0x0001UL -#define GHCB_SEV_CPUID_REQ 0x004UL -#define GHCB_CPUID_REQ_EAX 0 -#define GHCB_CPUID_REQ_EBX 1 -#define GHCB_CPUID_REQ_ECX 2 -#define GHCB_CPUID_REQ_EDX 3 -#define GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \ - (((unsigned long)reg & 3) << 30) | \ - (((unsigned long)fn) << 32)) +#define GHCB_PROTO_OUR 0x0001UL +#define GHCB_PROTOCOL_MAX 1ULL +#define GHCB_DEFAULT_USAGE 0ULL -#define GHCB_PROTOCOL_MAX 0x0001UL -#define GHCB_DEFAULT_USAGE 0x0000UL - -#define GHCB_SEV_CPUID_RESP 0x005UL -#define GHCB_SEV_TERMINATE 0x100UL -#define GHCB_SEV_TERMINATE_REASON(reason_set, reason_val) \ - (((((u64)reason_set) & 0x7) << 12) | \ - ((((u64)reason_val) & 0xff) << 16)) -#define GHCB_SEV_ES_REASON_GENERAL_REQUEST 0 -#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1 - -#define GHCB_SEV_GHCB_RESP_CODE(v) ((v) & 0xfff) #define VMGEXIT() { asm volatile("rep; vmmcall\n\r"); } enum es_result { diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 0aa9f13efd572..6ec8b3bfd76eb 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -26,13 +26,13 @@ static bool __init sev_es_check_cpu_features(void) static void __noreturn sev_es_terminate(unsigned int reason) { - u64 val = GHCB_SEV_TERMINATE; + u64 val = GHCB_MSR_TERM_REQ; /* * Tell the hypervisor what went wrong - only reason-set 0 is * currently supported. */ - val |= GHCB_SEV_TERMINATE_REASON(0, reason); + val |= GHCB_SEV_TERM_REASON(0, reason); /* Request Guest Termination from Hypvervisor */ sev_es_wr_ghcb_msr(val); @@ -47,15 +47,15 @@ static bool sev_es_negotiate_protocol(void) u64 val; /* Do the GHCB protocol version negotiation */ - sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ); + sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_INFO(val) != GHCB_SEV_INFO) + if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP) return false; - if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR || - GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR) + if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR || + GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR) return false; return true; @@ -153,28 +153,28 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->ax = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->bx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->cx = val >> 32; sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX)); VMGEXIT(); val = sev_es_rd_ghcb_msr(); - if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP) + if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP) goto fail; regs->dx = val >> 32; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 84b3133c2251d..42f8a7b9048fb 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -20,6 +20,7 @@ #include #include +#include #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT) @@ -525,40 +526,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ -#define GHCB_VERSION_MAX 1ULL -#define GHCB_VERSION_MIN 1ULL - -#define GHCB_MSR_INFO_POS 0 -#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) - -#define GHCB_MSR_SEV_INFO_RESP 0x001 -#define GHCB_MSR_SEV_INFO_REQ 0x002 -#define GHCB_MSR_VER_MAX_POS 48 -#define GHCB_MSR_VER_MAX_MASK 0xffff -#define GHCB_MSR_VER_MIN_POS 32 -#define GHCB_MSR_VER_MIN_MASK 0xffff -#define GHCB_MSR_CBIT_POS 24 -#define GHCB_MSR_CBIT_MASK 0xff -#define GHCB_MSR_SEV_INFO(_max, _min, _cbit) \ - ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) | \ - (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) | \ - (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) | \ - GHCB_MSR_SEV_INFO_RESP) - -#define GHCB_MSR_CPUID_REQ 0x004 -#define GHCB_MSR_CPUID_RESP 0x005 -#define GHCB_MSR_CPUID_FUNC_POS 32 -#define GHCB_MSR_CPUID_FUNC_MASK 0xffffffff -#define GHCB_MSR_CPUID_VALUE_POS 32 -#define GHCB_MSR_CPUID_VALUE_MASK 0xffffffff -#define GHCB_MSR_CPUID_REG_POS 30 -#define GHCB_MSR_CPUID_REG_MASK 0x3 - -#define GHCB_MSR_TERM_REQ 0x100 -#define GHCB_MSR_TERM_REASON_SET_POS 12 -#define GHCB_MSR_TERM_REASON_SET_MASK 0xf -#define GHCB_MSR_TERM_REASON_POS 16 -#define GHCB_MSR_TERM_REASON_MASK 0xff +#define GHCB_VERSION_MAX 1ULL +#define GHCB_VERSION_MIN 1ULL + extern unsigned int max_sev_asid; -- GitLab From 059e5c321a65657877924256ea8ad9c0df257b45 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Tue, 27 Apr 2021 06:16:36 -0500 Subject: [PATCH 0134/3804] x86/msr: Rename MSR_K8_SYSCFG to MSR_AMD64_SYSCFG The SYSCFG MSR continued being updated beyond the K8 family; drop the K8 name from it. Suggested-by: Borislav Petkov Signed-off-by: Brijesh Singh Signed-off-by: Borislav Petkov Acked-by: Joerg Roedel Link: https://lkml.kernel.org/r/20210427111636.1207-4-brijesh.singh@amd.com --- Documentation/virt/kvm/amd-memory-encryption.rst | 2 +- Documentation/x86/amd-memory-encryption.rst | 6 +++--- arch/x86/include/asm/msr-index.h | 6 +++--- arch/x86/kernel/cpu/amd.c | 4 ++-- arch/x86/kernel/cpu/mtrr/cleanup.c | 2 +- arch/x86/kernel/cpu/mtrr/generic.c | 4 ++-- arch/x86/kernel/mmconf-fam10h_64.c | 2 +- arch/x86/kvm/svm/svm.c | 4 ++-- arch/x86/kvm/x86.c | 2 +- arch/x86/mm/mem_encrypt_identity.c | 6 +++--- arch/x86/pci/amd_bus.c | 2 +- arch/x86/realmode/rm/trampoline_64.S | 4 ++-- drivers/edac/amd64_edac.c | 2 +- tools/arch/x86/include/asm/msr-index.h | 6 +++--- 14 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst index 5ec8a1902e15a..5c081c8c7164a 100644 --- a/Documentation/virt/kvm/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/amd-memory-encryption.rst @@ -22,7 +22,7 @@ to SEV:: [ecx]: Bits[31:0] Number of encrypted guests supported simultaneously -If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015 +If support for SEV is present, MSR 0xc001_0010 (MSR_AMD64_SYSCFG) and MSR 0xc001_0015 (MSR_K7_HWCR) can be used to determine if it can be enabled:: 0xc001_0010: diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/x86/amd-memory-encryption.rst index c48d452d07189..a1940ebe7be50 100644 --- a/Documentation/x86/amd-memory-encryption.rst +++ b/Documentation/x86/amd-memory-encryption.rst @@ -53,7 +53,7 @@ CPUID function 0x8000001f reports information related to SME:: system physical addresses, not guest physical addresses) -If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to +If support for SME is present, MSR 0xc00100010 (MSR_AMD64_SYSCFG) can be used to determine if SME is enabled and/or to enable memory encryption:: 0xc0010010: @@ -79,7 +79,7 @@ The state of SME in the Linux kernel can be documented as follows: The CPU supports SME (determined through CPUID instruction). - Enabled: - Supported and bit 23 of MSR_K8_SYSCFG is set. + Supported and bit 23 of MSR_AMD64_SYSCFG is set. - Active: Supported, Enabled and the Linux kernel is actively applying @@ -89,7 +89,7 @@ The state of SME in the Linux kernel can be documented as follows: SME can also be enabled and activated in the BIOS. If SME is enabled and activated in the BIOS, then all memory accesses will be encrypted and it will not be necessary to activate the Linux memory encryption support. If the BIOS -merely enables SME (sets bit 23 of the MSR_K8_SYSCFG), then Linux can activate +merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or by supplying mem_encrypt=on on the kernel command line. However, if BIOS does not enable SME, then Linux will not be able to activate memory encryption, even diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 742d89a00721d..211ba3375ee96 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -537,9 +537,9 @@ /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2d11384dc9ab4..0adb0341cd7c5 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -593,8 +593,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) */ if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) { /* Check if memory encryption is enabled */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) goto clear_all; /* diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 0c3b372318b70..b5f43049fa5f7 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -836,7 +836,7 @@ int __init amd_special_default_mtrr(void) if (boot_cpu_data.x86 < 0xf) return 0; /* In case some hypervisor doesn't pass SYSCFG through: */ - if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) + if (rdmsr_safe(MSR_AMD64_SYSCFG, &l, &h) < 0) return 0; /* * Memory between 4GB and top of mem is forced WB by this magic bit. diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index b90f3f437765c..558108296f3cf 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -53,13 +53,13 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - rdmsr(MSR_K8_SYSCFG, lo, hi); + rdmsr(MSR_AMD64_SYSCFG, lo, hi); if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) { pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]" " not cleared by BIOS, clearing this bit\n", smp_processor_id()); lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY; - mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi); + mtrr_wrmsr(MSR_AMD64_SYSCFG, lo, hi); } } diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index b5cb49e57df85..c94dec6a18345 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c @@ -95,7 +95,7 @@ static void get_fam10h_pci_mmconf_base(void) return; /* SYS_CFG */ - address = MSR_K8_SYSCFG; + address = MSR_AMD64_SYSCFG; rdmsrl(address, val); /* TOP_MEM2 is not enabled? */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index b649f92287a2e..433e8e4fb3a65 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -858,8 +858,8 @@ static __init void svm_adjust_mmio_mask(void) return; /* If memory encryption is not enabled, use existing mask */ - rdmsrl(MSR_K8_SYSCFG, msr); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; enc_bit = cpuid_ebx(0x8000001f) & 0x3f; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6eda2834fc05e..853c40e893352 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3402,7 +3402,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_LASTBRANCHTOIP: case MSR_IA32_LASTINTFROMIP: case MSR_IA32_LASTINTTOIP: - case MSR_K8_SYSCFG: + case MSR_AMD64_SYSCFG: case MSR_K8_TSEG_ADDR: case MSR_K8_TSEG_MASK: case MSR_VM_HSAVE_PA: diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 04aba7e80a362..a9639f663d25f 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -529,7 +529,7 @@ void __init sme_enable(struct boot_params *bp) /* * No SME if Hypervisor bit is set. This check is here to * prevent a guest from trying to enable SME. For running as a - * KVM guest the MSR_K8_SYSCFG will be sufficient, but there + * KVM guest the MSR_AMD64_SYSCFG will be sufficient, but there * might be other hypervisors which emulate that MSR as non-zero * or even pass it through to the guest. * A malicious hypervisor can still trick a guest into this @@ -542,8 +542,8 @@ void __init sme_enable(struct boot_params *bp) return; /* For SME, check the SYSCFG MSR */ - msr = __rdmsr(MSR_K8_SYSCFG); - if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT)) + msr = __rdmsr(MSR_AMD64_SYSCFG); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) return; } else { /* SEV state cannot be controlled by a command line option */ diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index ae744b6a07856..dd40d3fea74e4 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -284,7 +284,7 @@ static int __init early_root_info_init(void) /* need to take out [4G, TOM2) for RAM*/ /* SYS_CFG */ - address = MSR_K8_SYSCFG; + address = MSR_AMD64_SYSCFG; rdmsrl(address, val); /* TOP_MEM2 is enabled? */ if (val & (1<<21)) { diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index 84c5d1b33d100..cc8391f86cdb6 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S @@ -123,9 +123,9 @@ SYM_CODE_START(startup_32) */ btl $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags jnc .Ldone - movl $MSR_K8_SYSCFG, %ecx + movl $MSR_AMD64_SYSCFG, %ecx rdmsr - bts $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax + bts $MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT, %eax jc .Ldone /* diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 9fa4dfc6ebee6..f0d8f60acee10 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3083,7 +3083,7 @@ static void read_mc_regs(struct amd64_pvt *pvt) edac_dbg(0, " TOP_MEM: 0x%016llx\n", pvt->top_mem); /* Check first whether TOP_MEM2 is enabled: */ - rdmsrl(MSR_K8_SYSCFG, msr_val); + rdmsrl(MSR_AMD64_SYSCFG, msr_val); if (msr_val & BIT(21)) { rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2); edac_dbg(0, " TOP_MEM2: 0x%016llx\n", pvt->top_mem2); diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 45029354e0a8b..c60b09e7602f4 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -533,9 +533,9 @@ /* K8 MSRs */ #define MSR_K8_TOP_MEM1 0xc001001a #define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 -#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_AMD64_SYSCFG 0xc0010010 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_AMD64_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT) #define MSR_K8_INT_PENDING_MSG 0xc0010055 /* C1E active bits in int pending message */ #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 -- GitLab From 970655aa9b42461f8394e4457307005bdeee14d9 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 23 Apr 2021 07:40:38 +0200 Subject: [PATCH 0135/3804] xen/gntdev: fix gntdev_mmap() error exit path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit d3eeb1d77c5d0af ("xen/gntdev: use mmu_interval_notifier_insert") introduced an error in gntdev_mmap(): in case the call of mmu_interval_notifier_insert_locked() fails the exit path should not call mmu_interval_notifier_remove(), as this might result in NULL dereferences. One reason for failure is e.g. a signal pending for the running process. Fixes: d3eeb1d77c5d0af ("xen/gntdev: use mmu_interval_notifier_insert") Cc: stable@vger.kernel.org Reported-by: Marek Marczykowski-Górecki Tested-by: Marek Marczykowski-Górecki Signed-off-by: Juergen Gross Reviewed-by: Luca Fancellu Link: https://lore.kernel.org/r/20210423054038.26696-1-jgross@suse.com Signed-off-by: Juergen Gross --- drivers/xen/gntdev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index f01d58c7a042e..a3e7be96527d7 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -1017,8 +1017,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma) err = mmu_interval_notifier_insert_locked( &map->notifier, vma->vm_mm, vma->vm_start, vma->vm_end - vma->vm_start, &gntdev_mmu_ops); - if (err) + if (err) { + map->vma = NULL; goto out_unlock_put; + } } mutex_unlock(&priv->lock); -- GitLab From dbc03e81586fc33e4945263fd6e09e22eb4b980f Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 8 May 2021 10:19:13 +0800 Subject: [PATCH 0136/3804] xen/unpopulated-alloc: fix error return code in fill_list() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: a4574f63edc6 ("mm/memremap_pages: convert to 'struct range'") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20210508021913.1727-1-thunder.leizhen@huawei.com Signed-off-by: Juergen Gross --- drivers/xen/unpopulated-alloc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c index e64e6befc63b7..87e6b7db892f5 100644 --- a/drivers/xen/unpopulated-alloc.c +++ b/drivers/xen/unpopulated-alloc.c @@ -39,8 +39,10 @@ static int fill_list(unsigned int nr_pages) } pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL); - if (!pgmap) + if (!pgmap) { + ret = -ENOMEM; goto err_pgmap; + } pgmap->type = MEMORY_DEVICE_GENERIC; pgmap->range = (struct range) { -- GitLab From c5a80540e425a5f9a82b0f3163e3b6a4331f33bc Mon Sep 17 00:00:00 2001 From: Dominik Andreas Schorpp Date: Thu, 22 Apr 2021 09:58:52 +0200 Subject: [PATCH 0137/3804] USB: serial: ftdi_sio: add IDs for IDS GmbH Products Add the IDS GmbH Vendor ID and the Product IDs for SI31A (2xRS232) and CM31A (LoRaWAN Modem). Signed-off-by: Dominik Andreas Schorpp Signed-off-by: Juergen Borleis Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 3 +++ drivers/usb/serial/ftdi_sio_ids.h | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 6f2659e59b2ee..369ef140df78a 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1034,6 +1034,9 @@ static const struct usb_device_id id_table_combined[] = { /* Sienna devices */ { USB_DEVICE(FTDI_VID, FTDI_SIENNA_PID) }, { USB_DEVICE(ECHELON_VID, ECHELON_U20_PID) }, + /* IDS GmbH devices */ + { USB_DEVICE(IDS_VID, IDS_SI31A_PID) }, + { USB_DEVICE(IDS_VID, IDS_CM31A_PID) }, /* U-Blox devices */ { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ZED_PID) }, { USB_DEVICE(UBLOX_VID, UBLOX_C099F9P_ODIN_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 3d47c6d72256e..d854e04a4286e 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1567,6 +1567,13 @@ #define UNJO_VID 0x22B7 #define UNJO_ISODEBUG_V1_PID 0x150D +/* + * IDS GmbH + */ +#define IDS_VID 0x2CAF +#define IDS_SI31A_PID 0x13A2 +#define IDS_CM31A_PID 0x13A3 + /* * U-Blox products (http://www.u-blox.com). */ -- GitLab From e467714f822b5d167a7fb03d34af91b5b6af1827 Mon Sep 17 00:00:00 2001 From: Daniele Palmas Date: Wed, 28 Apr 2021 09:26:34 +0200 Subject: [PATCH 0138/3804] USB: serial: option: add Telit LE910-S1 compositions 0x7010, 0x7011 Add support for the following Telit LE910-S1 compositions: 0x7010: rndis, tty, tty, tty 0x7011: ecm, tty, tty, tty Signed-off-by: Daniele Palmas Link: https://lore.kernel.org/r/20210428072634.5091-1-dnlplm@gmail.com Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 3e79a543d3e77..7608584ef4fe7 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1240,6 +1240,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1901, 0xff), /* Telit LN940 (MBIM) */ .driver_info = NCTRL(0) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7010, 0xff), /* Telit LE910-S1 (RNDIS) */ + .driver_info = NCTRL(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */ + .driver_info = NCTRL(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */ .driver_info = NCTRL(0) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ -- GitLab From 89b1a3d811e6f8065d6ae8a25e7682329b4a31e2 Mon Sep 17 00:00:00 2001 From: Sean MacLennan Date: Sat, 1 May 2021 20:40:45 -0400 Subject: [PATCH 0139/3804] USB: serial: ti_usb_3410_5052: add startech.com device id This adds support for the Startech.com generic serial to USB converter. It seems to be a bone stock TI_3410. I have been using this patch for years. Signed-off-by: Sean MacLennan Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/ti_usb_3410_5052.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index caa46ac23db90..310db5abea9d8 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -37,6 +37,7 @@ /* Vendor and product ids */ #define TI_VENDOR_ID 0x0451 #define IBM_VENDOR_ID 0x04b3 +#define STARTECH_VENDOR_ID 0x14b0 #define TI_3410_PRODUCT_ID 0x3410 #define IBM_4543_PRODUCT_ID 0x4543 #define IBM_454B_PRODUCT_ID 0x454b @@ -370,6 +371,7 @@ static const struct usb_device_id ti_id_table_3410[] = { { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) }, + { USB_DEVICE(STARTECH_VENDOR_ID, TI_3410_PRODUCT_ID) }, { } /* terminator */ }; @@ -408,6 +410,7 @@ static const struct usb_device_id ti_id_table_combined[] = { { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) }, + { USB_DEVICE(STARTECH_VENDOR_ID, TI_3410_PRODUCT_ID) }, { } /* terminator */ }; -- GitLab From f8e8c1b2f782e7391e8a1c25648ce756e2a7d481 Mon Sep 17 00:00:00 2001 From: Zolton Jheng Date: Mon, 10 May 2021 10:32:00 +0800 Subject: [PATCH 0140/3804] USB: serial: pl2303: add device id for ADLINK ND-6530 GC This adds the device id for the ADLINK ND-6530 which is a PL2303GC based device. Signed-off-by: Zolton Jheng Cc: stable@vger.kernel.org Signed-off-by: Johan Hovold --- drivers/usb/serial/pl2303.c | 1 + drivers/usb/serial/pl2303.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index fd773d252691b..940050c314822 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -113,6 +113,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(SONY_VENDOR_ID, SONY_QN3USB_PRODUCT_ID) }, { USB_DEVICE(SANWA_VENDOR_ID, SANWA_PRODUCT_ID) }, { USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530_PRODUCT_ID) }, + { USB_DEVICE(ADLINK_VENDOR_ID, ADLINK_ND6530GC_PRODUCT_ID) }, { USB_DEVICE(SMART_VENDOR_ID, SMART_PRODUCT_ID) }, { USB_DEVICE(AT_VENDOR_ID, AT_VTKIT3_PRODUCT_ID) }, { } /* Terminating entry */ diff --git a/drivers/usb/serial/pl2303.h b/drivers/usb/serial/pl2303.h index 0f681ddbfd288..6097ee8fccb25 100644 --- a/drivers/usb/serial/pl2303.h +++ b/drivers/usb/serial/pl2303.h @@ -158,6 +158,7 @@ /* ADLINK ND-6530 RS232,RS485 and RS422 adapter */ #define ADLINK_VENDOR_ID 0x0b63 #define ADLINK_ND6530_PRODUCT_ID 0x6530 +#define ADLINK_ND6530GC_PRODUCT_ID 0x653a /* SMART USB Serial Adapter */ #define SMART_VENDOR_ID 0x0b8c -- GitLab From 0c6c2d3615efb7c292573f2e6c886929a2b2da6c Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 28 Apr 2021 13:12:31 +0100 Subject: [PATCH 0141/3804] arm64: Generate cpucaps.h The arm64 code allocates an internal constant to every CPU feature it can detect, distinct from the public hwcap numbers we use to expose some features to userspace. Currently this is maintained manually which is an irritating source of conflicts when working on new features, to avoid this replace the header with a simple text file listing the names we've assigned and sort it to minimise conflicts. As part of doing this we also do the Kbuild hookup required to hook up an arch tools directory and to generate header files in there. This will result in a renumbering and reordering of the existing constants, since they are all internal only the values should not be important. The reordering will impact the order in which some steps in enumeration handle features but the algorithm is not intended to depend on this and I haven't seen any issues when testing. Due to the UAO cpucap having been removed in the past we end up with ARM64_NCAPS being 1 smaller than it was before. Signed-off-by: Mark Brown Reviewed-by: Mark Rutland Tested-by: Mark Rutland Link: https://lore.kernel.org/r/20210428121231.11219-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 3 ++ arch/arm64/include/asm/Kbuild | 2 + arch/arm64/include/asm/cpucaps.h | 74 -------------------------------- arch/arm64/tools/Makefile | 22 ++++++++++ arch/arm64/tools/cpucaps | 65 ++++++++++++++++++++++++++++ arch/arm64/tools/gen-cpucaps.awk | 40 +++++++++++++++++ 6 files changed, 132 insertions(+), 74 deletions(-) delete mode 100644 arch/arm64/include/asm/cpucaps.h create mode 100644 arch/arm64/tools/Makefile create mode 100644 arch/arm64/tools/cpucaps create mode 100755 arch/arm64/tools/gen-cpucaps.awk diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 7ef44478560df..b52481f0605d8 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -175,6 +175,9 @@ vdso_install: $(if $(CONFIG_COMPAT_VDSO), \ $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@) +archprepare: + $(Q)$(MAKE) $(build)=arch/arm64/tools kapi + # We use MRPROPER_FILES and CLEAN_FILES now archclean: $(Q)$(MAKE) $(clean)=$(boot) diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 07ac208edc894..26889dbfe904d 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -5,3 +5,5 @@ generic-y += qrwlock.h generic-y += qspinlock.h generic-y += set_memory.h generic-y += user.h + +generated-y += cpucaps.h diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h deleted file mode 100644 index b0c5eda0498f2..0000000000000 --- a/arch/arm64/include/asm/cpucaps.h +++ /dev/null @@ -1,74 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * arch/arm64/include/asm/cpucaps.h - * - * Copyright (C) 2016 ARM Ltd. - */ -#ifndef __ASM_CPUCAPS_H -#define __ASM_CPUCAPS_H - -#define ARM64_WORKAROUND_CLEAN_CACHE 0 -#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 -#define ARM64_WORKAROUND_845719 2 -#define ARM64_HAS_SYSREG_GIC_CPUIF 3 -#define ARM64_HAS_PAN 4 -#define ARM64_HAS_LSE_ATOMICS 5 -#define ARM64_WORKAROUND_CAVIUM_23154 6 -#define ARM64_WORKAROUND_834220 7 -#define ARM64_HAS_NO_HW_PREFETCH 8 -#define ARM64_HAS_VIRT_HOST_EXTN 11 -#define ARM64_WORKAROUND_CAVIUM_27456 12 -#define ARM64_HAS_32BIT_EL0 13 -#define ARM64_SPECTRE_V3A 14 -#define ARM64_HAS_CNP 15 -#define ARM64_HAS_NO_FPSIMD 16 -#define ARM64_WORKAROUND_REPEAT_TLBI 17 -#define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 -#define ARM64_WORKAROUND_858921 19 -#define ARM64_WORKAROUND_CAVIUM_30115 20 -#define ARM64_HAS_DCPOP 21 -#define ARM64_SVE 22 -#define ARM64_UNMAP_KERNEL_AT_EL0 23 -#define ARM64_SPECTRE_V2 24 -#define ARM64_HAS_RAS_EXTN 25 -#define ARM64_WORKAROUND_843419 26 -#define ARM64_HAS_CACHE_IDC 27 -#define ARM64_HAS_CACHE_DIC 28 -#define ARM64_HW_DBM 29 -#define ARM64_SPECTRE_V4 30 -#define ARM64_MISMATCHED_CACHE_TYPE 31 -#define ARM64_HAS_STAGE2_FWB 32 -#define ARM64_HAS_CRC32 33 -#define ARM64_SSBS 34 -#define ARM64_WORKAROUND_1418040 35 -#define ARM64_HAS_SB 36 -#define ARM64_WORKAROUND_SPECULATIVE_AT 37 -#define ARM64_HAS_ADDRESS_AUTH_ARCH 38 -#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 -#define ARM64_HAS_GENERIC_AUTH_ARCH 40 -#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41 -#define ARM64_HAS_IRQ_PRIO_MASKING 42 -#define ARM64_HAS_DCPODP 43 -#define ARM64_WORKAROUND_1463225 44 -#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 -#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 -#define ARM64_WORKAROUND_1542419 47 -#define ARM64_HAS_E0PD 48 -#define ARM64_HAS_RNG 49 -#define ARM64_HAS_AMU_EXTN 50 -#define ARM64_HAS_ADDRESS_AUTH 51 -#define ARM64_HAS_GENERIC_AUTH 52 -#define ARM64_HAS_32BIT_EL1 53 -#define ARM64_BTI 54 -#define ARM64_HAS_ARMv8_4_TTL 55 -#define ARM64_HAS_TLB_RANGE 56 -#define ARM64_MTE 57 -#define ARM64_WORKAROUND_1508412 58 -#define ARM64_HAS_LDAPR 59 -#define ARM64_KVM_PROTECTED_MODE 60 -#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP 61 -#define ARM64_HAS_EPAN 62 - -#define ARM64_NCAPS 63 - -#endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile new file mode 100644 index 0000000000000..932b4fe5c7684 --- /dev/null +++ b/arch/arm64/tools/Makefile @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0 + +gen := arch/$(ARCH)/include/generated +kapi := $(gen)/asm + +kapi-hdrs-y := $(kapi)/cpucaps.h + +targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y)) + +PHONY += kapi + +kapi: $(kapi-hdrs-y) $(gen-y) + +# Create output directory if not already present +_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') + +quiet_cmd_gen_cpucaps = GEN $@ + cmd_gen_cpucaps = mkdir -p $(dir $@) && \ + $(AWK) -f $(filter-out $(PHONY),$^) > $@ + +$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE + $(call if_changed,gen_cpucaps) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps new file mode 100644 index 0000000000000..21fbdda7086e2 --- /dev/null +++ b/arch/arm64/tools/cpucaps @@ -0,0 +1,65 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Internal CPU capabilities constants, keep this list sorted + +BTI +HAS_32BIT_EL0 +HAS_32BIT_EL1 +HAS_ADDRESS_AUTH +HAS_ADDRESS_AUTH_ARCH +HAS_ADDRESS_AUTH_IMP_DEF +HAS_AMU_EXTN +HAS_ARMv8_4_TTL +HAS_CACHE_DIC +HAS_CACHE_IDC +HAS_CNP +HAS_CRC32 +HAS_DCPODP +HAS_DCPOP +HAS_E0PD +HAS_EPAN +HAS_GENERIC_AUTH +HAS_GENERIC_AUTH_ARCH +HAS_GENERIC_AUTH_IMP_DEF +HAS_IRQ_PRIO_MASKING +HAS_LDAPR +HAS_LSE_ATOMICS +HAS_NO_FPSIMD +HAS_NO_HW_PREFETCH +HAS_PAN +HAS_RAS_EXTN +HAS_RNG +HAS_SB +HAS_STAGE2_FWB +HAS_SYSREG_GIC_CPUIF +HAS_TLB_RANGE +HAS_VIRT_HOST_EXTN +HW_DBM +KVM_PROTECTED_MODE +MISMATCHED_CACHE_TYPE +MTE +SPECTRE_V2 +SPECTRE_V3A +SPECTRE_V4 +SSBS +SVE +UNMAP_KERNEL_AT_EL0 +WORKAROUND_834220 +WORKAROUND_843419 +WORKAROUND_845719 +WORKAROUND_858921 +WORKAROUND_1418040 +WORKAROUND_1463225 +WORKAROUND_1508412 +WORKAROUND_1542419 +WORKAROUND_CAVIUM_23154 +WORKAROUND_CAVIUM_27456 +WORKAROUND_CAVIUM_30115 +WORKAROUND_CAVIUM_TX2_219_PRFM +WORKAROUND_CAVIUM_TX2_219_TVM +WORKAROUND_CLEAN_CACHE +WORKAROUND_DEVICE_LOAD_ACQUIRE +WORKAROUND_NVIDIA_CARMEL_CNP +WORKAROUND_QCOM_FALKOR_E1003 +WORKAROUND_REPEAT_TLBI +WORKAROUND_SPECULATIVE_AT diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk new file mode 100755 index 0000000000000..18737a1ce0448 --- /dev/null +++ b/arch/arm64/tools/gen-cpucaps.awk @@ -0,0 +1,40 @@ +#!/bin/awk -f +# SPDX-License-Identifier: GPL-2.0 +# gen-cpucaps.awk: arm64 cpucaps header generator +# +# Usage: awk -f gen-cpucaps.awk cpucaps.txt + +# Log an error and terminate +function fatal(msg) { + print "Error at line " NR ": " msg > "/dev/stderr" + exit 1 +} + +# skip blank lines and comment lines +/^$/ { next } +/^#/ { next } + +BEGIN { + print "#ifndef __ASM_CPUCAPS_H" + print "#define __ASM_CPUCAPS_H" + print "" + print "/* Generated file - do not edit */" + cap_num = 0 + print "" +} + +/^[vA-Z0-9_]+$/ { + printf("#define ARM64_%-30s\t%d\n", $0, cap_num++) + next +} + +END { + printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num) + print "" + print "#endif" +} + +# Any lines not handled by previous rules are unexpected +{ + fatal("unhandled statement") +} -- GitLab From a1bed090fc56e6e24517d96bc076595544fb5317 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 7 May 2021 17:25:42 +0100 Subject: [PATCH 0142/3804] kselftest/arm64: Add missing stddef.h include to BTI tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Explicitly include stddef.h when building the BTI tests so that we have a definition of NULL, with at least some toolchains this is not done implicitly by anything else: test.c: In function ‘start’: test.c:214:25: error: ‘NULL’ undeclared (first use in this function) 214 | sigaction(SIGILL, &sa, NULL); | ^~~~ test.c:20:1: note: ‘NULL’ is defined in header ‘’; did you forget to ‘#include ’? Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20210507162542.23149-1-broonie@kernel.org Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/bti/test.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c index 656b04976ccc6..67b77ab83c20e 100644 --- a/tools/testing/selftests/arm64/bti/test.c +++ b/tools/testing/selftests/arm64/bti/test.c @@ -6,6 +6,7 @@ #include "system.h" +#include #include #include #include -- GitLab From e90812c47b958407b54d05780dc483fdc1b57a93 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:19:11 +0200 Subject: [PATCH 0143/3804] staging: media: rkvdec: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. Reviewed-by: Ezequiel Garcia Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/rkvdec/rkvdec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c index d821661d30f38..8c17615f3a7ab 100644 --- a/drivers/staging/media/rkvdec/rkvdec.c +++ b/drivers/staging/media/rkvdec/rkvdec.c @@ -658,7 +658,7 @@ static void rkvdec_device_run(void *priv) if (WARN_ON(!desc)) return; - ret = pm_runtime_get_sync(rkvdec->dev); + ret = pm_runtime_resume_and_get(rkvdec->dev); if (ret < 0) { rkvdec_job_finish_no_pm(ctx, VB2_BUF_STATE_ERROR); return; -- GitLab From 4cba5473c5ce0f1389d316c5dc6f83a0259df5eb Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Apr 2021 10:39:47 +0200 Subject: [PATCH 0144/3804] media: venus: Rework error fail recover logic The Venus code has a sort of watchdog that attempts to recover from IP errors, implemented as a delayed work job, which calls venus_sys_error_handler(). Right now, it has several issues: 1. It assumes that PM runtime resume never fails 2. It internally runs two while() loops that also assume that PM runtime will never fail to go idle: while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc)) msleep(10); ... while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0])) usleep_range(1000, 1500); 3. It uses an OR to merge all return codes and then report to the user 4. If the hardware never recovers, it keeps running on every 10ms, flooding the syslog with 2 messages (so, up to 200 messages per second). Rework the code, in order to prevent that, by: 1. check the return code from PM runtime resume; 2. don't let the while() loops run forever; 3. store the failed event; 4. use warn ratelimited when it fails to recover. Fixes: af2c3834c8ca ("[media] media: venus: adding core part and helper functions") Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/qcom/venus/core.c | 60 +++++++++++++++++++----- 1 file changed, 47 insertions(+), 13 deletions(-) diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c index 54bac7ec14c50..91b15842c5558 100644 --- a/drivers/media/platform/qcom/venus/core.c +++ b/drivers/media/platform/qcom/venus/core.c @@ -78,22 +78,32 @@ static const struct hfi_core_ops venus_core_ops = { .event_notify = venus_event_notify, }; +#define RPM_WAIT_FOR_IDLE_MAX_ATTEMPTS 10 + static void venus_sys_error_handler(struct work_struct *work) { struct venus_core *core = container_of(work, struct venus_core, work.work); - int ret = 0; - - pm_runtime_get_sync(core->dev); + int ret, i, max_attempts = RPM_WAIT_FOR_IDLE_MAX_ATTEMPTS; + const char *err_msg = ""; + bool failed = false; + + ret = pm_runtime_get_sync(core->dev); + if (ret < 0) { + err_msg = "resume runtime PM"; + max_attempts = 0; + failed = true; + } hfi_core_deinit(core, true); - dev_warn(core->dev, "system error has occurred, starting recovery!\n"); - mutex_lock(&core->lock); - while (pm_runtime_active(core->dev_dec) || pm_runtime_active(core->dev_enc)) + for (i = 0; i < max_attempts; i++) { + if (!pm_runtime_active(core->dev_dec) && !pm_runtime_active(core->dev_enc)) + break; msleep(10); + } venus_shutdown(core); @@ -101,31 +111,55 @@ static void venus_sys_error_handler(struct work_struct *work) pm_runtime_put_sync(core->dev); - while (core->pmdomains[0] && pm_runtime_active(core->pmdomains[0])) + for (i = 0; i < max_attempts; i++) { + if (!core->pmdomains[0] || !pm_runtime_active(core->pmdomains[0])) + break; usleep_range(1000, 1500); + } hfi_reinit(core); - pm_runtime_get_sync(core->dev); + ret = pm_runtime_get_sync(core->dev); + if (ret < 0) { + err_msg = "resume runtime PM"; + failed = true; + } - ret |= venus_boot(core); - ret |= hfi_core_resume(core, true); + ret = venus_boot(core); + if (ret && !failed) { + err_msg = "boot Venus"; + failed = true; + } + + ret = hfi_core_resume(core, true); + if (ret && !failed) { + err_msg = "resume HFI"; + failed = true; + } enable_irq(core->irq); mutex_unlock(&core->lock); - ret |= hfi_core_init(core); + ret = hfi_core_init(core); + if (ret && !failed) { + err_msg = "init HFI"; + failed = true; + } pm_runtime_put_sync(core->dev); - if (ret) { + if (failed) { disable_irq_nosync(core->irq); - dev_warn(core->dev, "recovery failed (%d)\n", ret); + dev_warn_ratelimited(core->dev, + "System error has occurred, recovery failed to %s\n", + err_msg); schedule_delayed_work(&core->work, msecs_to_jiffies(10)); return; } + dev_warn(core->dev, "system error has occurred (recovered)\n"); + mutex_lock(&core->lock); core->sys_error = false; mutex_unlock(&core->lock); -- GitLab From 747bad54a677d8633ec14b39dfbeb859c821d7f2 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 28 Apr 2021 09:38:56 +0200 Subject: [PATCH 0145/3804] media: s5p_cec: decrement usage count if disabled There's a bug at s5p_cec_adap_enable(): if called to disable the device, it should call pm_runtime_put() instead of pm_runtime_disable(), as the goal here is to decrement the usage_count and not to disable PM runtime. Reported-by: Sylwester Nawrocki Reviewed-by: Jonathan Cameron Fixes: 1bcbf6f4b6b0 ("[media] cec: s5p-cec: Add s5p-cec driver") Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/platform/s5p/s5p_cec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/cec/platform/s5p/s5p_cec.c b/drivers/media/cec/platform/s5p/s5p_cec.c index 2a3e7ffefe0a2..3c7c4c3c798c1 100644 --- a/drivers/media/cec/platform/s5p/s5p_cec.c +++ b/drivers/media/cec/platform/s5p/s5p_cec.c @@ -51,7 +51,7 @@ static int s5p_cec_adap_enable(struct cec_adapter *adap, bool enable) } else { s5p_cec_mask_tx_interrupts(cec); s5p_cec_mask_rx_interrupts(cec); - pm_runtime_disable(cec->dev); + pm_runtime_put(cec->dev); } return 0; -- GitLab From 6005a8e955e4e451e4bf6000affaab566d4cab5e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Apr 2021 08:36:00 +0200 Subject: [PATCH 0146/3804] media: i2c: ccs-core: return the right error code at suspend If pm_runtime resume logic fails, return the error code provided by it, instead of -EAGAIN, as, depending on what caused it to fail, it may not be something that would be recovered. Fixes: cbba45d43631 ("[media] smiapp: Use runtime PM") Reviewed-by: Jonathan Cameron Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/ccs/ccs-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c index 9dc3f45da3dcd..b05f409014b2f 100644 --- a/drivers/media/i2c/ccs/ccs-core.c +++ b/drivers/media/i2c/ccs/ccs-core.c @@ -3093,7 +3093,7 @@ static int __maybe_unused ccs_suspend(struct device *dev) if (rval < 0) { pm_runtime_put_noidle(dev); - return -EAGAIN; + return rval; } if (sensor->streaming) -- GitLab From da3a1858c3a37c09446e1470c48352897d59d11b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:19:11 +0200 Subject: [PATCH 0147/3804] media: i2c: ccs-core: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. There is a bug at ccs_pm_get_init(): when this function returns an error, the stream is not started, and RPM usage_count should not be incremented. However, if the calls to v4l2_ctrl_handler_setup() return errors, it will be kept incremented. At ccs_suspend() the best is to replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter automatically, in the case of errors. Fixes: 96e3a6b92f23 ("media: smiapp: Avoid maintaining power state information") Cc: stable@vger.kernel.org Acked-by: Sakari Ailus Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/ccs/ccs-core.c | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/drivers/media/i2c/ccs/ccs-core.c b/drivers/media/i2c/ccs/ccs-core.c index b05f409014b2f..4a848ac2d2cd2 100644 --- a/drivers/media/i2c/ccs/ccs-core.c +++ b/drivers/media/i2c/ccs/ccs-core.c @@ -1880,21 +1880,33 @@ static int ccs_pm_get_init(struct ccs_sensor *sensor) struct i2c_client *client = v4l2_get_subdevdata(&sensor->src->sd); int rval; + /* + * It can't use pm_runtime_resume_and_get() here, as the driver + * relies at the returned value to detect if the device was already + * active or not. + */ rval = pm_runtime_get_sync(&client->dev); - if (rval < 0) { - pm_runtime_put_noidle(&client->dev); + if (rval < 0) + goto error; - return rval; - } else if (!rval) { - rval = v4l2_ctrl_handler_setup(&sensor->pixel_array-> - ctrl_handler); - if (rval) - return rval; + /* Device was already active, so don't set controls */ + if (rval == 1) + return 0; - return v4l2_ctrl_handler_setup(&sensor->src->ctrl_handler); - } + /* Restore V4L2 controls to the previously suspended device */ + rval = v4l2_ctrl_handler_setup(&sensor->pixel_array->ctrl_handler); + if (rval) + goto error; + rval = v4l2_ctrl_handler_setup(&sensor->src->ctrl_handler); + if (rval) + goto error; + + /* Keep PM runtime usage_count incremented on success */ return 0; +error: + pm_runtime_put(&client->dev); + return rval; } static int ccs_set_stream(struct v4l2_subdev *subdev, int enable) -- GitLab From 62c90446868b439929cb04395f04a709a64ae04b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:19:13 +0200 Subject: [PATCH 0148/3804] media: i2c: imx334: fix the pm runtime get logic The PM runtime get logic is currently broken, as it checks if ret is zero instead of checking if it is an error code, as reported by Dan Carpenter. While here, use the pm_runtime_resume_and_get() as added by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") added pm_runtime_resume_and_get() in order to automatically handle dev->power.usage_count decrement on errors. As a bonus, such function always return zero on success. It should also be noticed that a fail of pm_runtime_get_sync() would potentially result in a spurious runtime_suspend(), instead of using pm_runtime_put_noidle(). Reported-by: Dan Carpenter Reviewed-by: Daniele Alessandrelli Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/i2c/imx334.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/media/i2c/imx334.c b/drivers/media/i2c/imx334.c index 047aa7658d217..23f28606e570f 100644 --- a/drivers/media/i2c/imx334.c +++ b/drivers/media/i2c/imx334.c @@ -717,9 +717,9 @@ static int imx334_set_stream(struct v4l2_subdev *sd, int enable) } if (enable) { - ret = pm_runtime_get_sync(imx334->dev); - if (ret) - goto error_power_off; + ret = pm_runtime_resume_and_get(imx334->dev); + if (ret < 0) + goto error_unlock; ret = imx334_start_streaming(imx334); if (ret) @@ -737,6 +737,7 @@ static int imx334_set_stream(struct v4l2_subdev *sd, int enable) error_power_off: pm_runtime_put(imx334->dev); +error_unlock: mutex_unlock(&imx334->mutex); return ret; -- GitLab From e6695c89b3d4595f60c9fe40e0938e085d15dd20 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 27 Apr 2021 11:43:54 +0200 Subject: [PATCH 0149/3804] media: exynos-gsc: don't resume at remove time Calling pm_runtime_get_sync() at driver's removal time is not needed, as this will resume PM runtime. Also, the PM runtime code at pm_runtime_disable() already calls it, if it detects the need. So, change the logic in order to disable PM runtime earlier. Reviewed-by: Jonathan Cameron Reviewed-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos-gsc/gsc-core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/exynos-gsc/gsc-core.c b/drivers/media/platform/exynos-gsc/gsc-core.c index 9f41c2e7097a6..f49f3322f835a 100644 --- a/drivers/media/platform/exynos-gsc/gsc-core.c +++ b/drivers/media/platform/exynos-gsc/gsc-core.c @@ -1210,18 +1210,19 @@ static int gsc_remove(struct platform_device *pdev) struct gsc_dev *gsc = platform_get_drvdata(pdev); int i; - pm_runtime_get_sync(&pdev->dev); - gsc_unregister_m2m_device(gsc); v4l2_device_unregister(&gsc->v4l2_dev); vb2_dma_contig_clear_max_seg_size(&pdev->dev); - for (i = 0; i < gsc->num_clocks; i++) - clk_disable_unprepare(gsc->clock[i]); - pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); + if (!pm_runtime_status_suspended(&pdev->dev)) + for (i = 0; i < gsc->num_clocks; i++) + clk_disable_unprepare(gsc->clock[i]); + + pm_runtime_set_suspended(&pdev->dev); + dev_dbg(&pdev->dev, "%s driver unloaded\n", pdev->name); return 0; } -- GitLab From dd97908ee35096356fb4111bb77d5f94bcfe337d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 16:47:42 +0200 Subject: [PATCH 0150/3804] media: atmel: properly get pm_runtime There are several issues in the way the atmel driver handles pm_runtime_get_sync(): - it doesn't check return codes; - it doesn't properly decrement the usage_count on all places; - it starts streaming even if pm_runtime_get_sync() fails. - while it tries to get pm_runtime at the clock enable logic, it doesn't check if the operation was suceeded. Replace all occurrences of it to use the new kAPI: pm_runtime_resume_and_get(), which ensures that, if the return code is not negative, the usage_count was incremented. With that, add additional checks when this is called, in order to ensure that errors will be properly addressed. Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/atmel/atmel-isc-base.c | 30 ++++++++++++++----- drivers/media/platform/atmel/atmel-isi.c | 19 +++++++++--- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/drivers/media/platform/atmel/atmel-isc-base.c b/drivers/media/platform/atmel/atmel-isc-base.c index fe3ec8d0eaee3..ce8e1351fa532 100644 --- a/drivers/media/platform/atmel/atmel-isc-base.c +++ b/drivers/media/platform/atmel/atmel-isc-base.c @@ -294,9 +294,13 @@ static int isc_wait_clk_stable(struct clk_hw *hw) static int isc_clk_prepare(struct clk_hw *hw) { struct isc_clk *isc_clk = to_isc_clk(hw); + int ret; - if (isc_clk->id == ISC_ISPCK) - pm_runtime_get_sync(isc_clk->dev); + if (isc_clk->id == ISC_ISPCK) { + ret = pm_runtime_resume_and_get(isc_clk->dev); + if (ret < 0) + return ret; + } return isc_wait_clk_stable(hw); } @@ -353,9 +357,13 @@ static int isc_clk_is_enabled(struct clk_hw *hw) { struct isc_clk *isc_clk = to_isc_clk(hw); u32 status; + int ret; - if (isc_clk->id == ISC_ISPCK) - pm_runtime_get_sync(isc_clk->dev); + if (isc_clk->id == ISC_ISPCK) { + ret = pm_runtime_resume_and_get(isc_clk->dev); + if (ret < 0) + return 0; + } regmap_read(isc_clk->regmap, ISC_CLKSR, &status); @@ -807,7 +815,12 @@ static int isc_start_streaming(struct vb2_queue *vq, unsigned int count) goto err_start_stream; } - pm_runtime_get_sync(isc->dev); + ret = pm_runtime_resume_and_get(isc->dev); + if (ret < 0) { + v4l2_err(&isc->v4l2_dev, "RPM resume failed in subdev %d\n", + ret); + goto err_pm_get; + } ret = isc_configure(isc); if (unlikely(ret)) @@ -838,7 +851,7 @@ static int isc_start_streaming(struct vb2_queue *vq, unsigned int count) err_configure: pm_runtime_put_sync(isc->dev); - +err_pm_get: v4l2_subdev_call(isc->current_subdev->sd, video, s_stream, 0); err_start_stream: @@ -1809,6 +1822,7 @@ static void isc_awb_work(struct work_struct *w) u32 baysel; unsigned long flags; u32 min, max; + int ret; /* streaming is not active anymore */ if (isc->stop) @@ -1831,7 +1845,9 @@ static void isc_awb_work(struct work_struct *w) ctrls->hist_id = hist_id; baysel = isc->config.sd_format->cfa_baycfg << ISC_HIS_CFG_BAYSEL_SHIFT; - pm_runtime_get_sync(isc->dev); + ret = pm_runtime_resume_and_get(isc->dev); + if (ret < 0) + return; /* * only update if we have all the required histograms and controls diff --git a/drivers/media/platform/atmel/atmel-isi.c b/drivers/media/platform/atmel/atmel-isi.c index e392b3efe3633..5b1dd358f2e63 100644 --- a/drivers/media/platform/atmel/atmel-isi.c +++ b/drivers/media/platform/atmel/atmel-isi.c @@ -422,7 +422,9 @@ static int start_streaming(struct vb2_queue *vq, unsigned int count) struct frame_buffer *buf, *node; int ret; - pm_runtime_get_sync(isi->dev); + ret = pm_runtime_resume_and_get(isi->dev); + if (ret < 0) + return ret; /* Enable stream on the sub device */ ret = v4l2_subdev_call(isi->entity.subdev, video, s_stream, 1); @@ -782,9 +784,10 @@ static int isi_enum_frameintervals(struct file *file, void *fh, return 0; } -static void isi_camera_set_bus_param(struct atmel_isi *isi) +static int isi_camera_set_bus_param(struct atmel_isi *isi) { u32 cfg1 = 0; + int ret; /* set bus param for ISI */ if (isi->pdata.hsync_act_low) @@ -801,12 +804,16 @@ static void isi_camera_set_bus_param(struct atmel_isi *isi) cfg1 |= ISI_CFG1_THMASK_BEATS_16; /* Enable PM and peripheral clock before operate isi registers */ - pm_runtime_get_sync(isi->dev); + ret = pm_runtime_resume_and_get(isi->dev); + if (ret < 0) + return ret; isi_writel(isi, ISI_CTRL, ISI_CTRL_DIS); isi_writel(isi, ISI_CFG1, cfg1); pm_runtime_put(isi->dev); + + return 0; } /* -----------------------------------------------------------------------*/ @@ -1085,7 +1092,11 @@ static int isi_graph_notify_complete(struct v4l2_async_notifier *notifier) dev_err(isi->dev, "No supported mediabus format found\n"); return ret; } - isi_camera_set_bus_param(isi); + ret = isi_camera_set_bus_param(isi); + if (ret) { + dev_err(isi->dev, "Can't wake up device\n"); + return ret; + } ret = isi_set_default_fmt(isi); if (ret) { -- GitLab From 892bb6ecead9b834ba7ad1d07513e9eba1baa3a4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 28 Apr 2021 08:27:55 +0200 Subject: [PATCH 0151/3804] media: hantro: do a PM resume earlier The device_run() first enables the clock and then tries to resume PM runtime, checking for errors. Well, if for some reason the pm_runtime can not resume, it would be better to detect it beforehand. So, change the order inside device_run(). Reviewed-by: Ezequiel Garcia Fixes: 775fec69008d ("media: add Rockchip VPU JPEG encoder driver") Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/hantro/hantro_drv.c | 33 +++++++++++++++-------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c index 595e82a827287..eea2009fa17bd 100644 --- a/drivers/staging/media/hantro/hantro_drv.c +++ b/drivers/staging/media/hantro/hantro_drv.c @@ -56,16 +56,12 @@ dma_addr_t hantro_get_ref(struct hantro_ctx *ctx, u64 ts) return hantro_get_dec_buf_addr(ctx, buf); } -static void hantro_job_finish(struct hantro_dev *vpu, - struct hantro_ctx *ctx, - enum vb2_buffer_state result) +static void hantro_job_finish_no_pm(struct hantro_dev *vpu, + struct hantro_ctx *ctx, + enum vb2_buffer_state result) { struct vb2_v4l2_buffer *src, *dst; - pm_runtime_mark_last_busy(vpu->dev); - pm_runtime_put_autosuspend(vpu->dev); - clk_bulk_disable(vpu->variant->num_clocks, vpu->clocks); - src = v4l2_m2m_next_src_buf(ctx->fh.m2m_ctx); dst = v4l2_m2m_next_dst_buf(ctx->fh.m2m_ctx); @@ -81,6 +77,18 @@ static void hantro_job_finish(struct hantro_dev *vpu, result); } +static void hantro_job_finish(struct hantro_dev *vpu, + struct hantro_ctx *ctx, + enum vb2_buffer_state result) +{ + pm_runtime_mark_last_busy(vpu->dev); + pm_runtime_put_autosuspend(vpu->dev); + + clk_bulk_disable(vpu->variant->num_clocks, vpu->clocks); + + hantro_job_finish_no_pm(vpu, ctx, result); +} + void hantro_irq_done(struct hantro_dev *vpu, enum vb2_buffer_state result) { @@ -152,12 +160,15 @@ static void device_run(void *priv) src = hantro_get_src_buf(ctx); dst = hantro_get_dst_buf(ctx); + ret = pm_runtime_get_sync(ctx->dev->dev); + if (ret < 0) { + pm_runtime_put_noidle(ctx->dev->dev); + goto err_cancel_job; + } + ret = clk_bulk_enable(ctx->dev->variant->num_clocks, ctx->dev->clocks); if (ret) goto err_cancel_job; - ret = pm_runtime_get_sync(ctx->dev->dev); - if (ret < 0) - goto err_cancel_job; v4l2_m2m_buf_copy_metadata(src, dst, true); @@ -165,7 +176,7 @@ static void device_run(void *priv) return; err_cancel_job: - hantro_job_finish(ctx->dev, ctx, VB2_BUF_STATE_ERROR); + hantro_job_finish_no_pm(ctx->dev, ctx, VB2_BUF_STATE_ERROR); } static struct v4l2_m2m_ops vpu_m2m_ops = { -- GitLab From e7c617cab7a522fba5b20f9033ee98565b6f3546 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 16:54:25 +0200 Subject: [PATCH 0152/3804] media: marvel-ccic: fix some issues when getting pm_runtime Calling pm_runtime_get_sync() is bad, since even when it returns an error, pm_runtime_put*() should be called. So, use instead pm_runtime_resume_and_get(). While here, ensure that the error condition will be checked during clock enable an media open() calls. Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/marvell-ccic/mcam-core.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c index 141bf5d97a044..ea87110d90738 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.c +++ b/drivers/media/platform/marvell-ccic/mcam-core.c @@ -918,6 +918,7 @@ static int mclk_enable(struct clk_hw *hw) struct mcam_camera *cam = container_of(hw, struct mcam_camera, mclk_hw); int mclk_src; int mclk_div; + int ret; /* * Clock the sensor appropriately. Controller clock should @@ -931,7 +932,9 @@ static int mclk_enable(struct clk_hw *hw) mclk_div = 2; } - pm_runtime_get_sync(cam->dev); + ret = pm_runtime_resume_and_get(cam->dev); + if (ret < 0) + return ret; clk_enable(cam->clk[0]); mcam_reg_write(cam, REG_CLKCTRL, (mclk_src << 29) | mclk_div); mcam_ctlr_power_up(cam); @@ -1611,7 +1614,9 @@ static int mcam_v4l_open(struct file *filp) ret = sensor_call(cam, core, s_power, 1); if (ret) goto out; - pm_runtime_get_sync(cam->dev); + ret = pm_runtime_resume_and_get(cam->dev); + if (ret < 0) + goto out; __mcam_cam_reset(cam); mcam_set_config_needed(cam, 1); } -- GitLab From d07bb9702cf5f5ccf3fb661e6cab54bbc33cd23f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 16:57:16 +0200 Subject: [PATCH 0153/3804] media: mdk-mdp: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. While here, fix the return contition of mtk_mdp_m2m_start_streaming(), as it doesn't make any sense to return 0 if the PM runtime failed to resume. Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c b/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c index ace4528cdc5ef..f14779e7596e5 100644 --- a/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c +++ b/drivers/media/platform/mtk-mdp/mtk_mdp_m2m.c @@ -391,12 +391,12 @@ static int mtk_mdp_m2m_start_streaming(struct vb2_queue *q, unsigned int count) struct mtk_mdp_ctx *ctx = q->drv_priv; int ret; - ret = pm_runtime_get_sync(&ctx->mdp_dev->pdev->dev); + ret = pm_runtime_resume_and_get(&ctx->mdp_dev->pdev->dev); if (ret < 0) - mtk_mdp_dbg(1, "[%d] pm_runtime_get_sync failed:%d", + mtk_mdp_dbg(1, "[%d] pm_runtime_resume_and_get failed:%d", ctx->id, ret); - return 0; + return ret; } static void *mtk_mdp_m2m_buf_remove(struct mtk_mdp_ctx *ctx, -- GitLab From fa9f443f7c962d072d150472e2bb77de39817a9a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 16:59:34 +0200 Subject: [PATCH 0154/3804] media: rcar_fdp1: simplify error check logic at fdp_open() Avoid some code duplication by moving the common error path logic at fdp_open(). Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rcar_fdp1.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/media/platform/rcar_fdp1.c b/drivers/media/platform/rcar_fdp1.c index 01c1fbb97bf67..d26413fa52057 100644 --- a/drivers/media/platform/rcar_fdp1.c +++ b/drivers/media/platform/rcar_fdp1.c @@ -2117,9 +2117,7 @@ static int fdp1_open(struct file *file) if (ctx->hdl.error) { ret = ctx->hdl.error; - v4l2_ctrl_handler_free(&ctx->hdl); - kfree(ctx); - goto done; + goto error_ctx; } ctx->fh.ctrl_handler = &ctx->hdl; @@ -2133,10 +2131,7 @@ static int fdp1_open(struct file *file) if (IS_ERR(ctx->fh.m2m_ctx)) { ret = PTR_ERR(ctx->fh.m2m_ctx); - - v4l2_ctrl_handler_free(&ctx->hdl); - kfree(ctx); - goto done; + goto error_ctx; } /* Perform any power management required */ @@ -2147,6 +2142,12 @@ static int fdp1_open(struct file *file) dprintk(fdp1, "Created instance: %p, m2m_ctx: %p\n", ctx, ctx->fh.m2m_ctx); + mutex_unlock(&fdp1->dev_mutex); + return 0; + +error_ctx: + v4l2_ctrl_handler_free(&ctx->hdl); + kfree(ctx); done: mutex_unlock(&fdp1->dev_mutex); return ret; -- GitLab From 45e75a8c6fa455a5909ac04db76a4b15d6bb8368 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 16:59:34 +0200 Subject: [PATCH 0155/3804] media: rcar_fdp1: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. Also, right now, the driver is ignoring any troubles when trying to do PM resume. So, add the proper error handling for the code. Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/rcar_fdp1.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/rcar_fdp1.c b/drivers/media/platform/rcar_fdp1.c index d26413fa52057..89aac60066d91 100644 --- a/drivers/media/platform/rcar_fdp1.c +++ b/drivers/media/platform/rcar_fdp1.c @@ -2135,7 +2135,9 @@ static int fdp1_open(struct file *file) } /* Perform any power management required */ - pm_runtime_get_sync(fdp1->dev); + ret = pm_runtime_resume_and_get(fdp1->dev); + if (ret < 0) + goto error_pm; v4l2_fh_add(&ctx->fh); @@ -2145,6 +2147,8 @@ static int fdp1_open(struct file *file) mutex_unlock(&fdp1->dev_mutex); return 0; +error_pm: + v4l2_m2m_ctx_release(ctx->fh.m2m_ctx); error_ctx: v4l2_ctrl_handler_free(&ctx->hdl); kfree(ctx); @@ -2352,7 +2356,9 @@ static int fdp1_probe(struct platform_device *pdev) /* Power up the cells to read HW */ pm_runtime_enable(&pdev->dev); - pm_runtime_get_sync(fdp1->dev); + ret = pm_runtime_resume_and_get(fdp1->dev); + if (ret < 0) + goto disable_pm; hw_version = fdp1_read(fdp1, FD1_IP_INTDATA); switch (hw_version) { @@ -2381,6 +2387,9 @@ static int fdp1_probe(struct platform_device *pdev) return 0; +disable_pm: + pm_runtime_disable(fdp1->dev); + release_m2m: v4l2_m2m_release(fdp1->m2m_dev); -- GitLab From 220955ec3c84505ec6a75bea494ec61f5295ef7a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:03:23 +0200 Subject: [PATCH 0156/3804] media: renesas-ceu: Properly check for PM errors Right now, the driver just assumes that PM runtime resume worked, but it may fail. Well, the pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. So, using it is tricky. Let's replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") and return an error if something bad happens. This should ensure that the PM runtime usage_count will be properly decremented if an error happens at open time. Reviewed-by: Jonathan Cameron Acked-by: Jacopo Mondi Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/renesas-ceu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/renesas-ceu.c b/drivers/media/platform/renesas-ceu.c index cd137101d41ea..17f01b6e3fe0f 100644 --- a/drivers/media/platform/renesas-ceu.c +++ b/drivers/media/platform/renesas-ceu.c @@ -1099,10 +1099,10 @@ static int ceu_open(struct file *file) mutex_lock(&ceudev->mlock); /* Causes soft-reset and sensor power on on first open */ - pm_runtime_get_sync(ceudev->dev); + ret = pm_runtime_resume_and_get(ceudev->dev); mutex_unlock(&ceudev->mlock); - return 0; + return ret; } static int ceu_release(struct file *file) -- GitLab From fdc34e82c0f968ac4c157bd3d8c299ebc24c9c63 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:04:23 +0200 Subject: [PATCH 0157/3804] media: s5p: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. While here, check if the PM runtime error was caught at s5p_cec_adap_enable(). Reviewed-by: Jonathan Cameron Reviewed-by: Sylwester Nawrocki Acked-by: Marek Szyprowski Signed-off-by: Mauro Carvalho Chehab --- drivers/media/cec/platform/s5p/s5p_cec.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/media/cec/platform/s5p/s5p_cec.c b/drivers/media/cec/platform/s5p/s5p_cec.c index 3c7c4c3c798c1..028a09a7531ef 100644 --- a/drivers/media/cec/platform/s5p/s5p_cec.c +++ b/drivers/media/cec/platform/s5p/s5p_cec.c @@ -35,10 +35,13 @@ MODULE_PARM_DESC(debug, "debug level (0-2)"); static int s5p_cec_adap_enable(struct cec_adapter *adap, bool enable) { + int ret; struct s5p_cec_dev *cec = cec_get_drvdata(adap); if (enable) { - pm_runtime_get_sync(cec->dev); + ret = pm_runtime_resume_and_get(cec->dev); + if (ret < 0) + return ret; s5p_cec_reset(cec); -- GitLab From c41e02493334985cca1a22efd5ca962ce3abb061 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:05:27 +0200 Subject: [PATCH 0158/3804] media: am437x: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. While here, ensure that the driver will check if PM runtime resumed at vpfe_initialize_device(). Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/am437x/am437x-vpfe.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/media/platform/am437x/am437x-vpfe.c b/drivers/media/platform/am437x/am437x-vpfe.c index 6cdc77dda0e49..1c9cb9e05fdf6 100644 --- a/drivers/media/platform/am437x/am437x-vpfe.c +++ b/drivers/media/platform/am437x/am437x-vpfe.c @@ -1021,7 +1021,9 @@ static int vpfe_initialize_device(struct vpfe_device *vpfe) if (ret) return ret; - pm_runtime_get_sync(vpfe->pdev); + ret = pm_runtime_resume_and_get(vpfe->pdev); + if (ret < 0) + return ret; vpfe_config_enable(&vpfe->ccdc, 1); @@ -2443,7 +2445,11 @@ static int vpfe_probe(struct platform_device *pdev) pm_runtime_enable(&pdev->dev); /* for now just enable it here instead of waiting for the open */ - pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); + if (ret < 0) { + vpfe_err(vpfe, "Unable to resume device.\n"); + goto probe_out_v4l2_unregister; + } vpfe_ccdc_config_defaults(ccdc); @@ -2530,6 +2536,11 @@ static int vpfe_suspend(struct device *dev) /* only do full suspend if streaming has started */ if (vb2_start_streaming_called(&vpfe->buffer_queue)) { + /* + * ignore RPM resume errors here, as it is already too late. + * A check like that should happen earlier, either at + * open() or just before start streaming. + */ pm_runtime_get_sync(dev); vpfe_config_enable(ccdc, 1); -- GitLab From 6e8b1526db164c9d4b9dacfb9bc48e365d7c4860 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:07:41 +0200 Subject: [PATCH 0159/3804] media: sh_vou: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. While here, check if the PM runtime error was caught at open time. Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/sh_vou.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/media/platform/sh_vou.c b/drivers/media/platform/sh_vou.c index 4ac48441f22c4..ca4310e26c49e 100644 --- a/drivers/media/platform/sh_vou.c +++ b/drivers/media/platform/sh_vou.c @@ -1133,7 +1133,11 @@ static int sh_vou_open(struct file *file) if (v4l2_fh_is_singular_file(file) && vou_dev->status == SH_VOU_INITIALISING) { /* First open */ - pm_runtime_get_sync(vou_dev->v4l2_dev.dev); + err = pm_runtime_resume_and_get(vou_dev->v4l2_dev.dev); + if (err < 0) { + v4l2_fh_release(file); + goto done_open; + } err = sh_vou_hw_init(vou_dev); if (err < 0) { pm_runtime_put(vou_dev->v4l2_dev.dev); -- GitLab From 908711f542c17fe61e5d653da1beb8e5ab5c7b50 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:19:09 +0200 Subject: [PATCH 0160/3804] media: mtk-vcodec: fix PM runtime get logic Currently, the driver just assumes that PM runtime logic succeded resuming the device. That may not be the case, as pm_runtime_get_sync() can fail (but keeping the usage count incremented). Replace the code to use pm_runtime_resume_and_get(), and letting it return the error code. This way, if mtk_vcodec_dec_pw_on() fails, the logic under fops_vcodec_open() will do the right thing and return an error, instead of just assuming that the device is ready to be used. Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c | 4 +++- drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c | 8 +++++--- drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c index 147dfef1638d2..f87dc47d9e638 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_drv.c @@ -126,7 +126,9 @@ static int fops_vcodec_open(struct file *file) mtk_vcodec_dec_set_default_params(ctx); if (v4l2_fh_is_singular(&ctx->fh)) { - mtk_vcodec_dec_pw_on(&dev->pm); + ret = mtk_vcodec_dec_pw_on(&dev->pm); + if (ret < 0) + goto err_load_fw; /* * Does nothing if firmware was already loaded. */ diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c index ddee7046ce422..6038db96f71c3 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.c @@ -88,13 +88,15 @@ void mtk_vcodec_release_dec_pm(struct mtk_vcodec_dev *dev) put_device(dev->pm.larbvdec); } -void mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm) +int mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm) { int ret; - ret = pm_runtime_get_sync(pm->dev); + ret = pm_runtime_resume_and_get(pm->dev); if (ret) - mtk_v4l2_err("pm_runtime_get_sync fail %d", ret); + mtk_v4l2_err("pm_runtime_resume_and_get fail %d", ret); + + return ret; } void mtk_vcodec_dec_pw_off(struct mtk_vcodec_pm *pm) diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h index 872d8bf8cfaf3..280aeaefdb651 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_pm.h @@ -12,7 +12,7 @@ int mtk_vcodec_init_dec_pm(struct mtk_vcodec_dev *dev); void mtk_vcodec_release_dec_pm(struct mtk_vcodec_dev *dev); -void mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm); +int mtk_vcodec_dec_pw_on(struct mtk_vcodec_pm *pm); void mtk_vcodec_dec_pw_off(struct mtk_vcodec_pm *pm); void mtk_vcodec_dec_clock_on(struct mtk_vcodec_pm *pm); void mtk_vcodec_dec_clock_off(struct mtk_vcodec_pm *pm); -- GitLab From 10343de268d10cf07b092b8b525e12ad558ead77 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:19:10 +0200 Subject: [PATCH 0161/3804] media: s5p-jpeg: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. As a plus, pm_runtime_resume_and_get() doesn't return positive numbers, so the return code validation can be removed. Reviewed-by: Jonathan Cameron Reviewed-by: Sylwester Nawrocki Acked-by: Andrzej Pietrasiewicz Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/s5p-jpeg/jpeg-core.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/media/platform/s5p-jpeg/jpeg-core.c b/drivers/media/platform/s5p-jpeg/jpeg-core.c index 026111505f5a5..d402e456f27df 100644 --- a/drivers/media/platform/s5p-jpeg/jpeg-core.c +++ b/drivers/media/platform/s5p-jpeg/jpeg-core.c @@ -2566,11 +2566,8 @@ static void s5p_jpeg_buf_queue(struct vb2_buffer *vb) static int s5p_jpeg_start_streaming(struct vb2_queue *q, unsigned int count) { struct s5p_jpeg_ctx *ctx = vb2_get_drv_priv(q); - int ret; - - ret = pm_runtime_get_sync(ctx->jpeg->dev); - return ret > 0 ? 0 : ret; + return pm_runtime_resume_and_get(ctx->jpeg->dev); } static void s5p_jpeg_stop_streaming(struct vb2_queue *q) -- GitLab From baa450f08d691a40fcc29ba8ce40e02613736ac7 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:19:10 +0200 Subject: [PATCH 0162/3804] media: sti/delta: use pm_runtime_resume_and_get() Commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") added pm_runtime_resume_and_get() in order to automatically handle dev->power.usage_count decrement on errors. Use the new API, in order to cleanup the error check logic. Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/sti/delta/delta-v4l2.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/media/platform/sti/delta/delta-v4l2.c b/drivers/media/platform/sti/delta/delta-v4l2.c index c691b3d81549d..064a00a3084a0 100644 --- a/drivers/media/platform/sti/delta/delta-v4l2.c +++ b/drivers/media/platform/sti/delta/delta-v4l2.c @@ -954,10 +954,8 @@ static void delta_run_work(struct work_struct *work) /* enable the hardware */ if (!dec->pm) { ret = delta_get_sync(ctx); - if (ret) { - delta_put_autosuspend(ctx); + if (ret) goto err; - } } /* decode this access unit */ @@ -1277,9 +1275,9 @@ int delta_get_sync(struct delta_ctx *ctx) int ret = 0; /* enable the hardware */ - ret = pm_runtime_get_sync(delta->dev); + ret = pm_runtime_resume_and_get(delta->dev); if (ret < 0) { - dev_err(delta->dev, "%s pm_runtime_get_sync failed (%d)\n", + dev_err(delta->dev, "%s pm_runtime_resume_and_get failed (%d)\n", __func__, ret); return ret; } -- GitLab From 9c298f82d8392f799a0595f50076afa1d91e9092 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:19:10 +0200 Subject: [PATCH 0163/3804] media: sunxi: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c b/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c index 3f81dd17755cb..fbcca59a0517c 100644 --- a/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c +++ b/drivers/media/platform/sunxi/sun8i-rotate/sun8i_rotate.c @@ -494,7 +494,7 @@ static int rotate_start_streaming(struct vb2_queue *vq, unsigned int count) struct device *dev = ctx->dev->dev; int ret; - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) { dev_err(dev, "Failed to enable module\n"); -- GitLab From c44eac5b72e23c31eefc0e10a71d9650036b8341 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:19:21 +0200 Subject: [PATCH 0164/3804] media: sti/bdisp: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. The bdisp_start_streaming() doesn't take it into account, which would unbalance PM usage counter at bdisp_stop_streaming(). The logic at bdisp_probe() is correct, but the best is to use the same call along the driver. So, replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/sti/bdisp/bdisp-v4l2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c index 060ca85f64d5d..85288da9d2ae6 100644 --- a/drivers/media/platform/sti/bdisp/bdisp-v4l2.c +++ b/drivers/media/platform/sti/bdisp/bdisp-v4l2.c @@ -499,7 +499,7 @@ static int bdisp_start_streaming(struct vb2_queue *q, unsigned int count) { struct bdisp_ctx *ctx = q->drv_priv; struct vb2_v4l2_buffer *buf; - int ret = pm_runtime_get_sync(ctx->bdisp_dev->dev); + int ret = pm_runtime_resume_and_get(ctx->bdisp_dev->dev); if (ret < 0) { dev_err(ctx->bdisp_dev->dev, "failed to set runtime PM\n"); @@ -1364,10 +1364,10 @@ static int bdisp_probe(struct platform_device *pdev) /* Power management */ pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) { dev_err(dev, "failed to set PM\n"); - goto err_pm; + goto err_remove; } /* Filters */ @@ -1395,6 +1395,7 @@ err_filter: bdisp_hw_free_filters(bdisp->dev); err_pm: pm_runtime_put(dev); +err_remove: bdisp_debugfs_remove(bdisp); v4l2_device_unregister(&bdisp->v4l2_dev); err_clk: -- GitLab From 59f96244af9403ddf4810ec5c0fbe8920857634e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:19:17 +0200 Subject: [PATCH 0165/3804] media: exynos4-is: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. On some places, this is ok, but on others the usage count ended being unbalanced on failures. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. As a bonus, such function always return zero on success. So, some code can be simplified. Reviewed-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos4-is/fimc-capture.c | 6 ++---- drivers/media/platform/exynos4-is/fimc-is.c | 4 ++-- drivers/media/platform/exynos4-is/fimc-isp-video.c | 3 +-- drivers/media/platform/exynos4-is/fimc-isp.c | 7 +++---- drivers/media/platform/exynos4-is/fimc-lite.c | 5 +++-- drivers/media/platform/exynos4-is/fimc-m2m.c | 5 +---- drivers/media/platform/exynos4-is/media-dev.c | 9 +++------ drivers/media/platform/exynos4-is/mipi-csis.c | 10 ++++------ 8 files changed, 19 insertions(+), 30 deletions(-) diff --git a/drivers/media/platform/exynos4-is/fimc-capture.c b/drivers/media/platform/exynos4-is/fimc-capture.c index 13c838d3f9473..0da36443173c1 100644 --- a/drivers/media/platform/exynos4-is/fimc-capture.c +++ b/drivers/media/platform/exynos4-is/fimc-capture.c @@ -478,11 +478,9 @@ static int fimc_capture_open(struct file *file) goto unlock; set_bit(ST_CAPT_BUSY, &fimc->state); - ret = pm_runtime_get_sync(&fimc->pdev->dev); - if (ret < 0) { - pm_runtime_put_sync(&fimc->pdev->dev); + ret = pm_runtime_resume_and_get(&fimc->pdev->dev); + if (ret < 0) goto unlock; - } ret = v4l2_fh_open(file); if (ret) { diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c index 972d9601d2360..1b24f5bfc4af4 100644 --- a/drivers/media/platform/exynos4-is/fimc-is.c +++ b/drivers/media/platform/exynos4-is/fimc-is.c @@ -828,9 +828,9 @@ static int fimc_is_probe(struct platform_device *pdev) goto err_irq; } - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) - goto err_pm; + goto err_irq; vb2_dma_contig_set_max_seg_size(dev, DMA_BIT_MASK(32)); diff --git a/drivers/media/platform/exynos4-is/fimc-isp-video.c b/drivers/media/platform/exynos4-is/fimc-isp-video.c index 612b9872afc87..8d9dc597deaaf 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp-video.c +++ b/drivers/media/platform/exynos4-is/fimc-isp-video.c @@ -275,7 +275,7 @@ static int isp_video_open(struct file *file) if (ret < 0) goto unlock; - ret = pm_runtime_get_sync(&isp->pdev->dev); + ret = pm_runtime_resume_and_get(&isp->pdev->dev); if (ret < 0) goto rel_fh; @@ -293,7 +293,6 @@ static int isp_video_open(struct file *file) if (!ret) goto unlock; rel_fh: - pm_runtime_put_noidle(&isp->pdev->dev); v4l2_fh_release(file); unlock: mutex_unlock(&isp->video_lock); diff --git a/drivers/media/platform/exynos4-is/fimc-isp.c b/drivers/media/platform/exynos4-is/fimc-isp.c index a77c49b185115..74b49d30901ed 100644 --- a/drivers/media/platform/exynos4-is/fimc-isp.c +++ b/drivers/media/platform/exynos4-is/fimc-isp.c @@ -304,11 +304,10 @@ static int fimc_isp_subdev_s_power(struct v4l2_subdev *sd, int on) pr_debug("on: %d\n", on); if (on) { - ret = pm_runtime_get_sync(&is->pdev->dev); - if (ret < 0) { - pm_runtime_put(&is->pdev->dev); + ret = pm_runtime_resume_and_get(&is->pdev->dev); + if (ret < 0) return ret; - } + set_bit(IS_ST_PWR_ON, &is->state); ret = fimc_is_start_firmware(is); diff --git a/drivers/media/platform/exynos4-is/fimc-lite.c b/drivers/media/platform/exynos4-is/fimc-lite.c index fe20af3a7178a..4d8b18078ff37 100644 --- a/drivers/media/platform/exynos4-is/fimc-lite.c +++ b/drivers/media/platform/exynos4-is/fimc-lite.c @@ -469,9 +469,9 @@ static int fimc_lite_open(struct file *file) } set_bit(ST_FLITE_IN_USE, &fimc->state); - ret = pm_runtime_get_sync(&fimc->pdev->dev); + ret = pm_runtime_resume_and_get(&fimc->pdev->dev); if (ret < 0) - goto err_pm; + goto err_in_use; ret = v4l2_fh_open(file); if (ret < 0) @@ -499,6 +499,7 @@ static int fimc_lite_open(struct file *file) v4l2_fh_release(file); err_pm: pm_runtime_put_sync(&fimc->pdev->dev); +err_in_use: clear_bit(ST_FLITE_IN_USE, &fimc->state); unlock: mutex_unlock(&fimc->lock); diff --git a/drivers/media/platform/exynos4-is/fimc-m2m.c b/drivers/media/platform/exynos4-is/fimc-m2m.c index c9704a147e5cf..df8e2aa454d8f 100644 --- a/drivers/media/platform/exynos4-is/fimc-m2m.c +++ b/drivers/media/platform/exynos4-is/fimc-m2m.c @@ -73,17 +73,14 @@ static void fimc_m2m_shutdown(struct fimc_ctx *ctx) static int start_streaming(struct vb2_queue *q, unsigned int count) { struct fimc_ctx *ctx = q->drv_priv; - int ret; - ret = pm_runtime_get_sync(&ctx->fimc_dev->pdev->dev); - return ret > 0 ? 0 : ret; + return pm_runtime_resume_and_get(&ctx->fimc_dev->pdev->dev); } static void stop_streaming(struct vb2_queue *q) { struct fimc_ctx *ctx = q->drv_priv; - fimc_m2m_shutdown(ctx); fimc_m2m_job_finish(ctx, VB2_BUF_STATE_ERROR); pm_runtime_put(&ctx->fimc_dev->pdev->dev); diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c index 13d192ba4aa6e..e025178db06c3 100644 --- a/drivers/media/platform/exynos4-is/media-dev.c +++ b/drivers/media/platform/exynos4-is/media-dev.c @@ -512,11 +512,9 @@ static int fimc_md_register_sensor_entities(struct fimc_md *fmd) if (!fmd->pmf) return -ENXIO; - ret = pm_runtime_get_sync(fmd->pmf); - if (ret < 0) { - pm_runtime_put(fmd->pmf); + ret = pm_runtime_resume_and_get(fmd->pmf); + if (ret < 0) return ret; - } fmd->num_sensors = 0; @@ -1291,8 +1289,7 @@ static int cam_clk_prepare(struct clk_hw *hw) if (camclk->fmd->pmf == NULL) return -ENODEV; - ret = pm_runtime_get_sync(camclk->fmd->pmf); - return ret < 0 ? ret : 0; + return pm_runtime_resume_and_get(camclk->fmd->pmf); } static void cam_clk_unprepare(struct clk_hw *hw) diff --git a/drivers/media/platform/exynos4-is/mipi-csis.c b/drivers/media/platform/exynos4-is/mipi-csis.c index 1aac167abb175..ebf39c8568943 100644 --- a/drivers/media/platform/exynos4-is/mipi-csis.c +++ b/drivers/media/platform/exynos4-is/mipi-csis.c @@ -494,7 +494,7 @@ static int s5pcsis_s_power(struct v4l2_subdev *sd, int on) struct device *dev = &state->pdev->dev; if (on) - return pm_runtime_get_sync(dev); + return pm_runtime_resume_and_get(dev); return pm_runtime_put_sync(dev); } @@ -509,11 +509,9 @@ static int s5pcsis_s_stream(struct v4l2_subdev *sd, int enable) if (enable) { s5pcsis_clear_counters(state); - ret = pm_runtime_get_sync(&state->pdev->dev); - if (ret && ret != 1) { - pm_runtime_put_noidle(&state->pdev->dev); + ret = pm_runtime_resume_and_get(&state->pdev->dev); + if (ret < 0) return ret; - } } mutex_lock(&state->lock); @@ -535,7 +533,7 @@ unlock: if (!enable) pm_runtime_put(&state->pdev->dev); - return ret == 1 ? 0 : ret; + return ret; } static int s5pcsis_enum_mbus_code(struct v4l2_subdev *sd, -- GitLab From 59087b66ea6730c130c57d23bd9fd139b78c1ba5 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 23 Apr 2021 17:19:18 +0200 Subject: [PATCH 0166/3804] media: exynos-gsc: fix pm_runtime_get_sync() usage count The pm_runtime_get_sync() internally increments the dev->power.usage_count without decrementing it, even on errors. Replace it by the new pm_runtime_resume_and_get(), introduced by: commit dd8088d5a896 ("PM: runtime: Add pm_runtime_resume_and_get to deal with usage counter") in order to properly decrement the usage counter, avoiding a potential PM usage counter leak. As a bonus, as pm_runtime_get_sync() always return 0 on success, the logic can be simplified. Reviewed-by: Jonathan Cameron Reviewed-by: Sylwester Nawrocki Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos-gsc/gsc-m2m.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/media/platform/exynos-gsc/gsc-m2m.c b/drivers/media/platform/exynos-gsc/gsc-m2m.c index 27a3c92c73bce..f1cf847d1cc2d 100644 --- a/drivers/media/platform/exynos-gsc/gsc-m2m.c +++ b/drivers/media/platform/exynos-gsc/gsc-m2m.c @@ -56,10 +56,8 @@ static void __gsc_m2m_job_abort(struct gsc_ctx *ctx) static int gsc_m2m_start_streaming(struct vb2_queue *q, unsigned int count) { struct gsc_ctx *ctx = q->drv_priv; - int ret; - ret = pm_runtime_get_sync(&ctx->gsc_dev->pdev->dev); - return ret > 0 ? 0 : ret; + return pm_runtime_resume_and_get(&ctx->gsc_dev->pdev->dev); } static void __gsc_m2m_cleanup_queue(struct gsc_ctx *ctx) -- GitLab From 9148cded3a0246d55e62187e219466c0c7986925 Mon Sep 17 00:00:00 2001 From: Aline Santana Cordeiro Date: Fri, 9 Apr 2021 14:24:21 +0200 Subject: [PATCH 0167/3804] media: staging: media: hantro: Align line break to the open parenthesis in file hantro_hw.h Aligns line break with the remaining function arguments to the open parenthesis. Issue found by checkpatch. Signed-off-by: Aline Santana Cordeiro Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/hantro/hantro_hw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h index 83b3e42b63a3b..0e34ae545f661 100644 --- a/drivers/staging/media/hantro/hantro_hw.h +++ b/drivers/staging/media/hantro/hantro_hw.h @@ -219,7 +219,7 @@ hantro_h264_mv_size(unsigned int width, unsigned int height) void hantro_g1_mpeg2_dec_run(struct hantro_ctx *ctx); void rk3399_vpu_mpeg2_dec_run(struct hantro_ctx *ctx); void hantro_mpeg2_dec_copy_qtable(u8 *qtable, - const struct v4l2_ctrl_mpeg2_quantization *ctrl); + const struct v4l2_ctrl_mpeg2_quantization *ctrl); int hantro_mpeg2_dec_init(struct hantro_ctx *ctx); void hantro_mpeg2_dec_exit(struct hantro_ctx *ctx); -- GitLab From d637c5dbbfee4c0cbd2f507b627e5b29823f49da Mon Sep 17 00:00:00 2001 From: Aline Santana Cordeiro Date: Fri, 9 Apr 2021 14:24:25 +0200 Subject: [PATCH 0168/3804] media: staging: media: hantro: Align line break to the open parenthesis in file hantro_mpeg2.c Aligns line break with the remaining function arguments to the open parenthesis. Issue found by checkpatch. Signed-off-by: Aline Santana Cordeiro Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/hantro/hantro_mpeg2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/hantro/hantro_mpeg2.c b/drivers/staging/media/hantro/hantro_mpeg2.c index 1d334e6fcd063..53a99a9988d51 100644 --- a/drivers/staging/media/hantro/hantro_mpeg2.c +++ b/drivers/staging/media/hantro/hantro_mpeg2.c @@ -19,7 +19,7 @@ static const u8 zigzag[64] = { }; void hantro_mpeg2_dec_copy_qtable(u8 *qtable, - const struct v4l2_ctrl_mpeg2_quantization *ctrl) + const struct v4l2_ctrl_mpeg2_quantization *ctrl) { int i, n; -- GitLab From d58f75de9b958ff9d996da942ccf79d7526bfde8 Mon Sep 17 00:00:00 2001 From: Aline Santana Cordeiro Date: Fri, 9 Apr 2021 21:01:08 +0200 Subject: [PATCH 0169/3804] media: staging: media: omap4iss: Align line break to the open parenthesis in file iss_video.c Aligns line break with the remaining function arguments to the open parenthesis. Issue found by checkpatch. Signed-off-by: Aline Santana Cordeiro Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/omap4iss/iss_video.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/omap4iss/iss_video.c b/drivers/staging/media/omap4iss/iss_video.c index 930f638f51eba..d0da083deed53 100644 --- a/drivers/staging/media/omap4iss/iss_video.c +++ b/drivers/staging/media/omap4iss/iss_video.c @@ -399,7 +399,7 @@ static void iss_video_buf_queue(struct vb2_buffer *vb) if (start) omap4iss_pipeline_set_stream(pipe, - ISS_PIPELINE_STREAM_SINGLESHOT); + ISS_PIPELINE_STREAM_SINGLESHOT); } } @@ -960,7 +960,7 @@ iss_video_streamon(struct file *file, void *fh, enum v4l2_buf_type type) unsigned long flags; ret = omap4iss_pipeline_set_stream(pipe, - ISS_PIPELINE_STREAM_CONTINUOUS); + ISS_PIPELINE_STREAM_CONTINUOUS); if (ret < 0) goto err_omap4iss_set_stream; spin_lock_irqsave(&video->qlock, flags); -- GitLab From 047d39c4a1bc197ec038008e941fa30d08d1d885 Mon Sep 17 00:00:00 2001 From: Martiros Shakhzadyan Date: Mon, 12 Apr 2021 04:35:56 +0200 Subject: [PATCH 0170/3804] media: staging: media: atomisp: Removed a superfluous else clause Fixed a coding style issue. Signed-off-by: Martiros Shakhzadyan Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/atomisp/i2c/atomisp-ov2722.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c index 1209492c1826a..912eadaffc442 100644 --- a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c +++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c @@ -774,11 +774,11 @@ static int ov2722_s_power(struct v4l2_subdev *sd, int on) if (on == 0) return power_down(sd); - else { - ret = power_up(sd); - if (!ret) - return ov2722_init(sd); - } + + ret = power_up(sd); + if (!ret) + return ov2722_init(sd); + return ret; } -- GitLab From 94dfa800dda45a0849aa493c05800b2a3557a6ee Mon Sep 17 00:00:00 2001 From: Beatriz Martins de Carvalho Date: Mon, 12 Apr 2021 15:43:01 +0200 Subject: [PATCH 0171/3804] media: staging: media: atomisp: i2c: align line break to match with open parenthesis Aligns line break with the remaining function arguments to the open parenthesis. Issue found by checkpatch in file atomisp-gc2235.c Signed-off-by: Beatriz Martins de Carvalho Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/atomisp/i2c/atomisp-gc2235.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c b/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c index 78147ffb60996..6ba4a8adff7c2 100644 --- a/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c +++ b/drivers/staging/media/atomisp/i2c/atomisp-gc2235.c @@ -171,8 +171,8 @@ static int __gc2235_buf_reg_array(struct i2c_client *client, } static int __gc2235_write_reg_is_consecutive(struct i2c_client *client, - struct gc2235_write_ctrl *ctrl, - const struct gc2235_reg *next) + struct gc2235_write_ctrl *ctrl, + const struct gc2235_reg *next) { if (ctrl->index == 0) return 1; -- GitLab From a21baa418c5b6a011f02d18d2214c28d6f3a4a47 Mon Sep 17 00:00:00 2001 From: Mitali Borkar Date: Tue, 13 Apr 2021 07:15:29 +0200 Subject: [PATCH 0172/3804] media: staging: media: intel-ipu3: remove unnecessary blank line Removed an unnecessary blank line to meet linux kernel coding style. Reported by checkpatch.pl Signed-off-by: Mitali Borkar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/ipu3/include/intel-ipu3.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/intel-ipu3.h index 9b644fb23dded..3fb7fc547effb 100644 --- a/drivers/staging/media/ipu3/include/intel-ipu3.h +++ b/drivers/staging/media/ipu3/include/intel-ipu3.h @@ -74,7 +74,6 @@ struct ipu3_uapi_grid_config { (IPU3_UAPI_AWB_MAX_SETS * \ (IPU3_UAPI_AWB_SET_SIZE + IPU3_UAPI_AWB_SPARE_FOR_BUBBLES)) - /** * struct ipu3_uapi_awb_raw_buffer - AWB raw buffer * -- GitLab From 25074ea239ac92321e75009e001049886f91d850 Mon Sep 17 00:00:00 2001 From: Mitali Borkar Date: Tue, 13 Apr 2021 07:15:46 +0200 Subject: [PATCH 0173/3804] media: staging: media: intel-ipu3: reduce length of line Reduced length of line as it was exceeding 100 characters by removing comments from same line and adding it to previous line. This makes code neater, and meets linux kernel coding style. Reported by checkpatch. Signed-off-by: Mitali Borkar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/ipu3/include/intel-ipu3.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/intel-ipu3.h index 3fb7fc547effb..ce068b2bea735 100644 --- a/drivers/staging/media/ipu3/include/intel-ipu3.h +++ b/drivers/staging/media/ipu3/include/intel-ipu3.h @@ -9,8 +9,10 @@ /* from /drivers/staging/media/ipu3/include/videodev2.h */ /* Vendor specific - used for IPU3 camera sub-system */ -#define V4L2_META_FMT_IPU3_PARAMS v4l2_fourcc('i', 'p', '3', 'p') /* IPU3 processing parameters */ -#define V4L2_META_FMT_IPU3_STAT_3A v4l2_fourcc('i', 'p', '3', 's') /* IPU3 3A statistics */ +/* IPU3 processing parameters */ +#define V4L2_META_FMT_IPU3_PARAMS v4l2_fourcc('i', 'p', '3', 'p') +/* IPU3 3A statistics */ +#define V4L2_META_FMT_IPU3_STAT_3A v4l2_fourcc('i', 'p', '3', 's') /* from include/uapi/linux/v4l2-controls.h */ #define V4L2_CID_INTEL_IPU3_BASE (V4L2_CID_USER_BASE + 0x10c0) -- GitLab From 17daf473e2a48ca34b434f69c00bd2fd6fa39a4d Mon Sep 17 00:00:00 2001 From: Mitali Borkar Date: Tue, 13 Apr 2021 07:16:22 +0200 Subject: [PATCH 0174/3804] media: staging: media: intel-ipu3: remove space before tabs Removed unnecessary space before tabs to adhere to linux kernel coding style. Reported by checkpatch. Signed-off-by: Mitali Borkar Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/ipu3/include/intel-ipu3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/intel-ipu3.h index ce068b2bea735..1f4a2fd7c9f5a 100644 --- a/drivers/staging/media/ipu3/include/intel-ipu3.h +++ b/drivers/staging/media/ipu3/include/intel-ipu3.h @@ -631,7 +631,7 @@ struct ipu3_uapi_bnr_static_config_wb_gains_thr_config { * @cg: Gain coefficient for threshold calculation, [0, 31], default 8. * @ci: Intensity coefficient for threshold calculation. range [0, 0x1f] * default 6. - * format: u3.2 (3 most significant bits represent whole number, + * format: u3.2 (3 most significant bits represent whole number, * 2 least significant bits represent the fractional part * with each count representing 0.25) * e.g. 6 in binary format is 00110, that translates to 1.5 -- GitLab From 72e03872410842e6c0de27b7243fe90af5254bc0 Mon Sep 17 00:00:00 2001 From: Mitali Borkar Date: Tue, 13 Apr 2021 17:29:17 +0200 Subject: [PATCH 0175/3804] media: staging: media: intel-ipu3: line should not end with '[' Fixed the issue of line should not end with '[' by moving argument from next line to line ending with '[' and made it under 80 characters. Reported by checkpatch. Signed-off-by: Mitali Borkar Acked-by: Sakari Ailus Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/ipu3/include/intel-ipu3.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/intel-ipu3.h index 1f4a2fd7c9f5a..fa3d6ee5adf29 100644 --- a/drivers/staging/media/ipu3/include/intel-ipu3.h +++ b/drivers/staging/media/ipu3/include/intel-ipu3.h @@ -245,8 +245,8 @@ struct ipu3_uapi_ae_ccm { */ struct ipu3_uapi_ae_config { struct ipu3_uapi_ae_grid_config grid_cfg __attribute__((aligned(32))); - struct ipu3_uapi_ae_weight_elem weights[ - IPU3_UAPI_AE_WEIGHTS] __attribute__((aligned(32))); + struct ipu3_uapi_ae_weight_elem weights[IPU3_UAPI_AE_WEIGHTS] + __attribute__((aligned(32))); struct ipu3_uapi_ae_ccm ae_ccm __attribute__((aligned(32))); } __packed; -- GitLab From 7900bdc25a019159911d5ee38f83b78ac6639589 Mon Sep 17 00:00:00 2001 From: Mitali Borkar Date: Tue, 13 Apr 2021 21:50:16 +0200 Subject: [PATCH 0176/3804] media: staging: media: zoran: add spaces around '<<' operator Added spaces around '<<' operator to improve readability and meet linux kernel coding style. Reported by checkpatch Signed-off-by: Mitali Borkar Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/zoran/zr36057.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/staging/media/zoran/zr36057.h b/drivers/staging/media/zoran/zr36057.h index 71b651add35ae..a2a75fd9f5354 100644 --- a/drivers/staging/media/zoran/zr36057.h +++ b/drivers/staging/media/zoran/zr36057.h @@ -30,13 +30,13 @@ #define ZR36057_VFESPFR_HOR_DCM 14 #define ZR36057_VFESPFR_VER_DCM 8 #define ZR36057_VFESPFR_DISP_MODE 6 -#define ZR36057_VFESPFR_YUV422 (0<<3) -#define ZR36057_VFESPFR_RGB888 (1<<3) -#define ZR36057_VFESPFR_RGB565 (2<<3) -#define ZR36057_VFESPFR_RGB555 (3<<3) -#define ZR36057_VFESPFR_ERR_DIF (1<<2) -#define ZR36057_VFESPFR_PACK24 (1<<1) -#define ZR36057_VFESPFR_LITTLE_ENDIAN (1<<0) +#define ZR36057_VFESPFR_YUV422 (0 << 3) +#define ZR36057_VFESPFR_RGB888 (1 << 3) +#define ZR36057_VFESPFR_RGB565 (2 << 3) +#define ZR36057_VFESPFR_RGB555 (3 << 3) +#define ZR36057_VFESPFR_ERR_DIF (1 << 2) +#define ZR36057_VFESPFR_PACK24 (1 << 1) +#define ZR36057_VFESPFR_LITTLE_ENDIAN (1 << 0) #define ZR36057_VDTR 0x00c /* Video Display "Top" Register */ -- GitLab From 451c34dd69b80857fa50e33581db22143afa8890 Mon Sep 17 00:00:00 2001 From: Martiros Shakhzadyan Date: Tue, 13 Apr 2021 20:03:13 +0200 Subject: [PATCH 0177/3804] media: staging: media: atomisp: Minor code style changes Fixed line continuation and parenthesis alignment issues. Signed-off-by: Martiros Shakhzadyan Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/atomisp/i2c/atomisp-ov2722.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c index 912eadaffc442..90a985ee25fa8 100644 --- a/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c +++ b/drivers/staging/media/atomisp/i2c/atomisp-ov2722.c @@ -49,8 +49,8 @@ static int ov2722_read_reg(struct i2c_client *client, return -ENODEV; } - if (data_length != OV2722_8BIT && data_length != OV2722_16BIT - && data_length != OV2722_32BIT) { + if (data_length != OV2722_8BIT && data_length != OV2722_16BIT && + data_length != OV2722_32BIT) { dev_err(&client->dev, "%s error, invalid data length\n", __func__); return -EINVAL; @@ -212,8 +212,8 @@ static int __ov2722_buf_reg_array(struct i2c_client *client, } static int __ov2722_write_reg_is_consecutive(struct i2c_client *client, - struct ov2722_write_ctrl *ctrl, - const struct ov2722_reg *next) + struct ov2722_write_ctrl *ctrl, + const struct ov2722_reg *next) { if (ctrl->index == 0) return 1; -- GitLab From d7c89be51d17d629e7c550388a81858af09ad343 Mon Sep 17 00:00:00 2001 From: Aline Santana Cordeiro Date: Wed, 14 Apr 2021 15:25:37 +0200 Subject: [PATCH 0178/3804] media: staging: media: omap4iss: Remove unused macro function Remove unused macro function "v4l2_dev_to_iss_device(dev)". Signed-off-by: Aline Santana Cordeiro Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/omap4iss/iss.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/staging/media/omap4iss/iss.h b/drivers/staging/media/omap4iss/iss.h index b88f9529683c1..3f587e0007295 100644 --- a/drivers/staging/media/omap4iss/iss.h +++ b/drivers/staging/media/omap4iss/iss.h @@ -119,9 +119,6 @@ struct iss_device { unsigned int isp_subclk_resources; }; -#define v4l2_dev_to_iss_device(dev) \ - container_of(dev, struct iss_device, v4l2_dev) - int omap4iss_get_external_info(struct iss_pipeline *pipe, struct media_link *link); -- GitLab From bbbcba0267e2faff32c62905219f1d3b81f75d30 Mon Sep 17 00:00:00 2001 From: Aline Santana Cordeiro Date: Wed, 14 Apr 2021 16:06:02 +0200 Subject: [PATCH 0179/3804] media: staging: media: atomisp: pci: Correct identation in block of conditional statements in file atomisp_v4l2.c Correct identation in block of conditional statements. The function "v4l2_device_unregister_subdev()" depends on the results of the macro function "list_for_each_entry_safe()". Signed-off-by: Aline Santana Cordeiro Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/atomisp/pci/atomisp_v4l2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c index 0295e2e32d797..6d853f480e1c8 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c +++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c @@ -1178,7 +1178,7 @@ static void atomisp_unregister_entities(struct atomisp_device *isp) atomisp_mipi_csi2_unregister_entities(&isp->csi2_port[i]); list_for_each_entry_safe(sd, next, &isp->v4l2_dev.subdevs, list) - v4l2_device_unregister_subdev(sd); + v4l2_device_unregister_subdev(sd); v4l2_device_unregister(&isp->v4l2_dev); media_device_unregister(&isp->media_dev); -- GitLab From 848802da8d0443befd155926ff4184e6ebffb5c0 Mon Sep 17 00:00:00 2001 From: Aline Santana Cordeiro Date: Wed, 14 Apr 2021 16:06:06 +0200 Subject: [PATCH 0180/3804] media: staging: media: atomisp: pci: Correct identation in block of conditional statements in file atomisp_acc.c Correct identation in block of conditional statements. The conditional statement depends on the results of the macro function "list_for_each_entry()". Signed-off-by: Aline Santana Cordeiro Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/atomisp/pci/atomisp_acc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_acc.c b/drivers/staging/media/atomisp/pci/atomisp_acc.c index f638d0bd09fe6..5e5faa4b34456 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_acc.c +++ b/drivers/staging/media/atomisp/pci/atomisp_acc.c @@ -77,8 +77,8 @@ acc_get_fw(struct atomisp_sub_device *asd, unsigned int handle) struct atomisp_acc_fw *acc_fw; list_for_each_entry(acc_fw, &asd->acc.fw, list) - if (acc_fw->handle == handle) - return acc_fw; + if (acc_fw->handle == handle) + return acc_fw; return NULL; } -- GitLab From 14bc5eb80bda1a3e3c8c2a0eab3064eaba949f3a Mon Sep 17 00:00:00 2001 From: Aline Santana Cordeiro Date: Wed, 14 Apr 2021 23:16:45 +0200 Subject: [PATCH 0181/3804] media: staging: media: atomisp: pci: Format comments according to coding-style in file atomisp_acc.c Format all comments according to the coding-style. Issue detected by checkpatch.pl. Signed-off-by: Aline Santana Cordeiro Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- drivers/staging/media/atomisp/pci/atomisp_acc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_acc.c b/drivers/staging/media/atomisp/pci/atomisp_acc.c index 5e5faa4b34456..9a1751895ab03 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_acc.c +++ b/drivers/staging/media/atomisp/pci/atomisp_acc.c @@ -464,9 +464,11 @@ int atomisp_acc_load_extensions(struct atomisp_sub_device *asd) continue; for (i = 0; i < ARRAY_SIZE(acc_flag_to_pipe); i++) { - /* QoS (ACC pipe) acceleration stages are currently - * allowed only in continuous mode. Skip them for - * all other modes. */ + /* + * QoS (ACC pipe) acceleration stages are + * currently allowed only in continuous mode. + * Skip them for all other modes. + */ if (!continuous && acc_flag_to_pipe[i].flag == ATOMISP_ACC_FW_LOAD_FL_ACC) -- GitLab From 73edc4da40635774100d0eb9ca2e6476e3b2b470 Mon Sep 17 00:00:00 2001 From: Aline Santana Cordeiro Date: Wed, 14 Apr 2021 23:16:48 +0200 Subject: [PATCH 0182/3804] media: staging: media: atomisp: pci: Format comments according to coding-style in file atomisp_cmd.h Format all comments according to the coding-style. Issue detected by checkpatch.pl. Signed-off-by: Aline Santana Cordeiro Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- .../staging/media/atomisp/pci/atomisp_cmd.h | 161 +++++------------- 1 file changed, 43 insertions(+), 118 deletions(-) diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.h b/drivers/staging/media/atomisp/pci/atomisp_cmd.h index 412baeb919449..e8bdd264d31b2 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_cmd.h +++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.h @@ -49,9 +49,7 @@ struct ia_css_frame; /* FIXME: check if can go */ extern int atomisp_punit_hpll_freq; -/* - * Helper function - */ +/* Helper function */ void dump_sp_dmem(struct atomisp_device *isp, unsigned int addr, unsigned int size); struct camera_mipi_info *atomisp_to_sensor_mipi_info(struct v4l2_subdev *sd); @@ -65,9 +63,7 @@ bool atomisp_buffers_queued(struct atomisp_sub_device *asd); /* ISP2401 */ bool atomisp_buffers_queued_pipe(struct atomisp_video_pipe *pipe); -/* - * Interrupt functions - */ +/* Interrupt functions */ void atomisp_msi_irq_init(struct atomisp_device *isp); void atomisp_msi_irq_uninit(struct atomisp_device *isp); void atomisp_wdt_work(struct work_struct *work); @@ -82,15 +78,10 @@ int atomisp_get_frame_pgnr(struct atomisp_device *isp, const struct ia_css_frame *frame, u32 *p_pgnr); void atomisp_delayed_init_work(struct work_struct *work); -/* - * Get internal fmt according to V4L2 fmt - */ - +/* Get internal fmt according to V4L2 fmt */ bool atomisp_is_viewfinder_support(struct atomisp_device *isp); -/* - * ISP features control function - */ +/* ISP features control function */ /* * Function to set sensor runmode by user when @@ -105,9 +96,7 @@ int atomisp_set_sensor_runmode(struct atomisp_sub_device *asd, int atomisp_gdc_cac(struct atomisp_sub_device *asd, int flag, __s32 *value); -/* - * Function to enable/disable low light mode (including ANR) - */ +/* Function to enable/disable low light mode (including ANR) */ int atomisp_low_light(struct atomisp_sub_device *asd, int flag, __s32 *value); @@ -120,91 +109,63 @@ int atomisp_xnr(struct atomisp_sub_device *asd, int flag, int *arg); int atomisp_formats(struct atomisp_sub_device *asd, int flag, struct atomisp_formats_config *config); -/* - * Function to configure noise reduction - */ +/* Function to configure noise reduction */ int atomisp_nr(struct atomisp_sub_device *asd, int flag, struct atomisp_nr_config *config); -/* - * Function to configure temporal noise reduction (TNR) - */ +/* Function to configure temporal noise reduction (TNR) */ int atomisp_tnr(struct atomisp_sub_device *asd, int flag, struct atomisp_tnr_config *config); -/* - * Function to configure black level compensation - */ +/* Function to configure black level compensation */ int atomisp_black_level(struct atomisp_sub_device *asd, int flag, struct atomisp_ob_config *config); -/* - * Function to configure edge enhancement - */ +/* Function to configure edge enhancement */ int atomisp_ee(struct atomisp_sub_device *asd, int flag, struct atomisp_ee_config *config); -/* - * Function to update Gamma table for gamma, brightness and contrast config - */ +/* Function to update Gamma table for gamma, brightness and contrast config */ int atomisp_gamma(struct atomisp_sub_device *asd, int flag, struct atomisp_gamma_table *config); -/* - * Function to update Ctc table for Chroma Enhancement - */ + +/* Function to update Ctc table for Chroma Enhancement */ int atomisp_ctc(struct atomisp_sub_device *asd, int flag, struct atomisp_ctc_table *config); -/* - * Function to update gamma correction parameters - */ +/* Function to update gamma correction parameters */ int atomisp_gamma_correction(struct atomisp_sub_device *asd, int flag, struct atomisp_gc_config *config); -/* - * Function to update Gdc table for gdc - */ +/* Function to update Gdc table for gdc */ int atomisp_gdc_cac_table(struct atomisp_sub_device *asd, int flag, struct atomisp_morph_table *config); -/* - * Function to update table for macc - */ +/* Function to update table for macc */ int atomisp_macc_table(struct atomisp_sub_device *asd, int flag, struct atomisp_macc_config *config); -/* - * Function to get DIS statistics. - */ + +/* Function to get DIS statistics. */ int atomisp_get_dis_stat(struct atomisp_sub_device *asd, struct atomisp_dis_statistics *stats); -/* - * Function to get DVS2 BQ resolution settings - */ +/* Function to get DVS2 BQ resolution settings */ int atomisp_get_dvs2_bq_resolutions(struct atomisp_sub_device *asd, struct atomisp_dvs2_bq_resolutions *bq_res); -/* - * Function to set the DIS coefficients. - */ +/* Function to set the DIS coefficients. */ int atomisp_set_dis_coefs(struct atomisp_sub_device *asd, struct atomisp_dis_coefficients *coefs); -/* - * Function to set the DIS motion vector. - */ +/* Function to set the DIS motion vector. */ int atomisp_set_dis_vector(struct atomisp_sub_device *asd, struct atomisp_dis_vector *vector); -/* - * Function to set/get 3A stat from isp - */ +/* Function to set/get 3A stat from isp */ int atomisp_3a_stat(struct atomisp_sub_device *asd, int flag, struct atomisp_3a_statistics *config); -/* - * Function to get metadata from isp - */ +/* Function to get metadata from isp */ int atomisp_get_metadata(struct atomisp_sub_device *asd, int flag, struct atomisp_metadata *config); @@ -213,84 +174,59 @@ int atomisp_get_metadata_by_type(struct atomisp_sub_device *asd, int flag, int atomisp_set_parameters(struct video_device *vdev, struct atomisp_parameters *arg); -/* - * Function to set/get isp parameters to isp - */ + +/* Function to set/get isp parameters to isp */ int atomisp_param(struct atomisp_sub_device *asd, int flag, struct atomisp_parm *config); -/* - * Function to configure color effect of the image - */ +/* Function to configure color effect of the image */ int atomisp_color_effect(struct atomisp_sub_device *asd, int flag, __s32 *effect); -/* - * Function to configure bad pixel correction - */ +/* Function to configure bad pixel correction */ int atomisp_bad_pixel(struct atomisp_sub_device *asd, int flag, __s32 *value); -/* - * Function to configure bad pixel correction params - */ +/* Function to configure bad pixel correction params */ int atomisp_bad_pixel_param(struct atomisp_sub_device *asd, int flag, struct atomisp_dp_config *config); -/* - * Function to enable/disable video image stablization - */ +/* Function to enable/disable video image stablization */ int atomisp_video_stable(struct atomisp_sub_device *asd, int flag, __s32 *value); -/* - * Function to configure fixed pattern noise - */ +/* Function to configure fixed pattern noise */ int atomisp_fixed_pattern(struct atomisp_sub_device *asd, int flag, __s32 *value); -/* - * Function to configure fixed pattern noise table - */ +/* Function to configure fixed pattern noise table */ int atomisp_fixed_pattern_table(struct atomisp_sub_device *asd, struct v4l2_framebuffer *config); -/* - * Function to configure false color correction - */ +/* Function to configure false color correction */ int atomisp_false_color(struct atomisp_sub_device *asd, int flag, __s32 *value); -/* - * Function to configure false color correction params - */ +/* Function to configure false color correction params */ int atomisp_false_color_param(struct atomisp_sub_device *asd, int flag, struct atomisp_de_config *config); -/* - * Function to configure white balance params - */ +/* Function to configure white balance params */ int atomisp_white_balance_param(struct atomisp_sub_device *asd, int flag, struct atomisp_wb_config *config); int atomisp_3a_config_param(struct atomisp_sub_device *asd, int flag, struct atomisp_3a_config *config); -/* - * Function to setup digital zoom - */ +/* Function to setup digital zoom */ int atomisp_digital_zoom(struct atomisp_sub_device *asd, int flag, __s32 *value); -/* - * Function set camera_prefiles.xml current sensor pixel array size - */ +/* Function set camera_prefiles.xml current sensor pixel array size */ int atomisp_set_array_res(struct atomisp_sub_device *asd, struct atomisp_resolution *config); -/* - * Function to calculate real zoom region for every pipe - */ +/* Function to calculate real zoom region for every pipe */ int atomisp_calculate_real_zoom_region(struct atomisp_sub_device *asd, struct ia_css_dz_config *dz_config, enum ia_css_pipe_id css_pipe_id); @@ -371,9 +307,7 @@ void atomisp_css_flush(struct atomisp_device *isp); int atomisp_source_pad_to_stream_id(struct atomisp_sub_device *asd, uint16_t source_pad); -/* - * Events. Only one event has to be exported for now. - */ +/* Events. Only one event has to be exported for now. */ void atomisp_eof_event(struct atomisp_sub_device *asd, uint8_t exp_id); enum mipi_port_id __get_mipi_port(struct atomisp_device *isp, @@ -389,34 +323,25 @@ void atomisp_free_css_parameters(struct atomisp_css_params *css_param); void atomisp_handle_parameter_and_buffer(struct atomisp_video_pipe *pipe); void atomisp_flush_params_queue(struct atomisp_video_pipe *asd); -/* - * Function to do Raw Buffer related operation, after enable Lock Unlock Raw Buffer - */ + +/* Function to do Raw Buffer related operation, after enable Lock Unlock Raw Buffer */ int atomisp_exp_id_unlock(struct atomisp_sub_device *asd, int *exp_id); int atomisp_exp_id_capture(struct atomisp_sub_device *asd, int *exp_id); -/* - * Function to update Raw Buffer bitmap - */ +/* Function to update Raw Buffer bitmap */ int atomisp_set_raw_buffer_bitmap(struct atomisp_sub_device *asd, int exp_id); void atomisp_init_raw_buffer_bitmap(struct atomisp_sub_device *asd); -/* - * Function to enable/disable zoom for capture pipe - */ +/* Function to enable/disable zoom for capture pipe */ int atomisp_enable_dz_capt_pipe(struct atomisp_sub_device *asd, unsigned int *enable); -/* - * Function to get metadata type bu pipe id - */ +/* Function to get metadata type bu pipe id */ enum atomisp_metadata_type atomisp_get_metadata_type(struct atomisp_sub_device *asd, enum ia_css_pipe_id pipe_id); -/* - * Function for HAL to inject a fake event to wake up poll thread - */ +/* Function for HAL to inject a fake event to wake up poll thread */ int atomisp_inject_a_fake_event(struct atomisp_sub_device *asd, int *event); /* -- GitLab From bc7c9993a0d836aa88aca2969dcb9b22031924d3 Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sun, 7 Feb 2021 18:02:33 +1100 Subject: [PATCH 0183/3804] m68k: Drop -fno-strength-reduce from KBUILD_CFLAGS This workaround became redundant either when the driver in question was removed (in Linux v2.6.23) or when the compiler flag became a no-op (in GCC v4.2). Linux has required GCC v4.6 or later since v4.19. Link: https://gcc.gnu.org/git/?p=gcc.git;a=commitdiff;h=efa1cdf01850b28c2f6f7035ebd4420259494615 References: commit 565bae6a4a8f ("[SCSI] 53c7xx: kill driver") References: commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6") Signed-off-by: Finn Thain Link: https://lore.kernel.org/r/baa95d7235921dff23bed6320518f3fa90396603.1612681353.git.fthain@telegraphics.com.au Signed-off-by: Geert Uytterhoeven --- arch/m68k/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/m68k/Makefile b/arch/m68k/Makefile index 82620f14124d2..c54055a3d2845 100644 --- a/arch/m68k/Makefile +++ b/arch/m68k/Makefile @@ -66,8 +66,7 @@ KBUILD_CFLAGS += $(cpuflags-y) KBUILD_CFLAGS += -pipe -ffreestanding ifdef CONFIG_MMU -# without -fno-strength-reduce the 53c7xx.c driver fails ;-( -KBUILD_CFLAGS += -fno-strength-reduce -ffixed-a2 +KBUILD_CFLAGS += -ffixed-a2 else # we can use a m68k-linux-gcc toolchain with these in place KBUILD_CPPFLAGS += -DUTS_SYSNAME=\"uClinux\" -- GitLab From eeff86b6d18ccd7ef1e663dd428b93f5887d02b5 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Mon, 10 May 2021 11:08:35 +0800 Subject: [PATCH 0184/3804] m68k: dma: Remove unnecessary include of asm/cacheflush.h In commit ca15ca406f660 ("mm: remove unneeded includes of "), asm/cacheflush.h independent on the MACRO was included at line 18. The include here is unnecessary. Remove it. Signed-off-by: Wan Jiabing Link: https://lore.kernel.org/r/20210510030836.11834-1-wanjiabing@vivo.com Signed-off-by: Geert Uytterhoeven --- arch/m68k/kernel/dma.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index 1c1b875fadc18..2e192a5df949b 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -34,9 +34,6 @@ pgprot_t pgprot_dmacoherent(pgprot_t prot) return prot; } #else - -#include - void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { -- GitLab From f279b49f13bd2151bbe402a1d812c1e3646c4bbb Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 10 May 2021 01:28:40 -0700 Subject: [PATCH 0185/3804] x86/boot: Modernize genimage script; hdimage+EFI support The image generation scripts in arch/x86/boot are pretty out of date, except for the isoimage target. Update and clean up the genimage.sh script, and make it support an arbitrary number of initramfs files in the image. Add a "hdimage" target, which can be booted by either BIOS or EFI (if the kernel is compiled with the EFI stub.) For EFI to be able to pass the command line to the kernel, we need the EFI shell, but the firmware builtin EFI shell, if it even exists, is pretty much always the last resort boot option, so search for OVMF or EDK2 and explicitly include a copy of the EFI shell. To make this all work, use bash features in the script. Furthermore, this version of the script makes use of some mtools features, especially mpartition, that might not exist in very old version of mtools, but given all the other dependencies on this script this doesn't seem such a big deal. Finally, put a volume label ("LINUX_BOOT") on all generated images. Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510082840.628372-1-hpa@zytor.com --- arch/x86/Makefile | 5 +- arch/x86/boot/.gitignore | 1 + arch/x86/boot/Makefile | 44 ++--- arch/x86/boot/genimage.sh | 303 +++++++++++++++++++++++++---------- arch/x86/boot/mtools.conf.in | 3 + 5 files changed, 252 insertions(+), 104 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index c77c5d8a7b3eb..d42764c60a0a4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -256,7 +256,7 @@ drivers-$(CONFIG_FB) += arch/x86/video/ boot := arch/x86/boot -BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 isoimage +BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 hdimage isoimage PHONY += bzImage $(BOOT_TARGETS) @@ -314,8 +314,9 @@ define archhelp echo ' fdimage - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' echo ' fdimage144 - Create 1.4MB boot floppy image (arch/x86/boot/fdimage)' echo ' fdimage288 - Create 2.8MB boot floppy image (arch/x86/boot/fdimage)' + echo ' hdimage - Create a BIOS/EFI hard disk image (arch/x86/boot/hdimage)' echo ' isoimage - Create a boot CD-ROM image (arch/x86/boot/image.iso)' - echo ' bzdisk/fdimage*/isoimage also accept:' + echo ' bzdisk/fdimage*/hdimage/isoimage also accept:' echo ' FDARGS="..." arguments for the booted kernel' echo ' FDINITRD=file initrd for the booted kernel' echo '' diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore index 9cc7f1357b9b8..1189be057ebd6 100644 --- a/arch/x86/boot/.gitignore +++ b/arch/x86/boot/.gitignore @@ -11,3 +11,4 @@ setup.elf fdimage mtools.conf image.iso +hdimage diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index fe605205b4ce2..dfbc26a8e9241 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -29,7 +29,7 @@ KCOV_INSTRUMENT := n SVGA_MODE := -DSVGA_MODE=NORMAL_VGA targets := vmlinux.bin setup.bin setup.elf bzImage -targets += fdimage fdimage144 fdimage288 image.iso mtools.conf +targets += fdimage fdimage144 fdimage288 image.iso hdimage subdir- := compressed setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o @@ -115,47 +115,49 @@ $(obj)/compressed/vmlinux: FORCE $(Q)$(MAKE) $(build)=$(obj)/compressed $@ # Set this if you want to pass append arguments to the -# bzdisk/fdimage/isoimage kernel +# bzdisk/fdimage/hdimage/isoimage kernel FDARGS = -# Set this if you want an initrd included with the -# bzdisk/fdimage/isoimage kernel +# Set this if you want one or more initrds included in the image FDINITRD = -image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,) +imgdeps = $(obj)/bzImage $(obj)/mtools.conf $(src)/genimage.sh $(obj)/mtools.conf: $(src)/mtools.conf.in sed -e 's|@OBJ@|$(obj)|g' < $< > $@ +targets += mtools.conf + +# genimage.sh requires bash, but it also has a bunch of other +# external dependencies. quiet_cmd_genimage = GENIMAGE $3 -cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ - $(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD) +cmd_genimage = $(BASH) $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ + $(obj)/mtools.conf '$(FDARGS)' $(FDINITRD) -PHONY += bzdisk fdimage fdimage144 fdimage288 isoimage bzlilo install +PHONY += bzdisk fdimage fdimage144 fdimage288 hdimage isoimage install # This requires write access to /dev/fd0 -bzdisk: $(obj)/bzImage $(obj)/mtools.conf +# All images require syslinux to be installed; hdimage also requires +# EDK2/OVMF if the kernel is compiled with the EFI stub. +bzdisk: $(imgdeps) $(call cmd,genimage,bzdisk,/dev/fd0) -# These require being root or having syslinux 2.02 or higher installed -fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf +fdimage fdimage144: $(imgdeps) $(call cmd,genimage,fdimage144,$(obj)/fdimage) @$(kecho) 'Kernel: $(obj)/fdimage is ready' -fdimage288: $(obj)/bzImage $(obj)/mtools.conf +fdimage288: $(imgdeps) $(call cmd,genimage,fdimage288,$(obj)/fdimage) @$(kecho) 'Kernel: $(obj)/fdimage is ready' -isoimage: $(obj)/bzImage +hdimage: $(imgdeps) + $(call cmd,genimage,hdimage,$(obj)/hdimage) + @$(kecho) 'Kernel: $(obj)/hdimage is ready' + +isoimage: $(imgdeps) $(call cmd,genimage,isoimage,$(obj)/image.iso) @$(kecho) 'Kernel: $(obj)/image.iso is ready' -bzlilo: - if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi - if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi - cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz - cp System.map $(INSTALL_PATH)/ - if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi - install: - sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \ + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh \ + $(KERNELRELEASE) $(obj)/bzImage \ System.map "$(INSTALL_PATH)" diff --git a/arch/x86/boot/genimage.sh b/arch/x86/boot/genimage.sh index 6a10d52a41452..0673fdfc1a11a 100644 --- a/arch/x86/boot/genimage.sh +++ b/arch/x86/boot/genimage.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive @@ -8,15 +8,24 @@ # # Adapted from code in arch/x86/boot/Makefile by H. Peter Anvin and others # -# "make fdimage/fdimage144/fdimage288/isoimage" script for x86 architecture +# "make fdimage/fdimage144/fdimage288/hdimage/isoimage" +# script for x86 architecture # # Arguments: -# $1 - fdimage format -# $2 - target image file -# $3 - kernel bzImage file -# $4 - mtool configuration file -# $5 - kernel cmdline -# $6 - inird image file +# $1 - fdimage format +# $2 - target image file +# $3 - kernel bzImage file +# $4 - mtools configuration file +# $5 - kernel cmdline +# $6+ - initrd image file(s) +# +# This script requires: +# bash +# syslinux +# mtools (for fdimage* and hdimage) +# edk2/OVMF (for hdimage) +# +# Otherwise try to stick to POSIX shell commands... # # Use "make V=1" to debug this script @@ -26,105 +35,237 @@ case "${KBUILD_VERBOSE}" in ;; esac -verify () { - if [ ! -f "$1" ]; then - echo "" 1>&2 - echo " *** Missing file: $1" 1>&2 - echo "" 1>&2 - exit 1 +# Exit the top-level shell with an error +topshell=$$ +trap 'exit 1' USR1 +die() { + echo "" 1>&2 + echo " *** $*" 1>&2 + echo "" 1>&2 + kill -USR1 $topshell +} + +# Verify the existence and readability of a file +verify() { + if [ ! -f "$1" -o ! -r "$1" ]; then + die "Missing file: $1" fi } +diskfmt="$1" +FIMAGE="$2" +FBZIMAGE="$3" +MTOOLSRC="$4" +KCMDLINE="$5" +shift 5 # Remaining arguments = initrd files + +export MTOOLSRC -export MTOOLSRC=$4 -FIMAGE=$2 -FBZIMAGE=$3 -KCMDLINE=$5 -FDINITRD=$6 +# common options for dd +dd='dd iflag=fullblock' # Make sure the files actually exist verify "$FBZIMAGE" -genbzdisk() { - verify "$MTOOLSRC" - mformat a: - syslinux $FIMAGE - echo "$KCMDLINE" | mcopy - a:syslinux.cfg - if [ -f "$FDINITRD" ] ; then - mcopy "$FDINITRD" a:initrd.img +declare -a FDINITRDS +irdpfx=' initrd=' +initrdopts_syslinux='' +initrdopts_efi='' +for f in "$@"; do + if [ -f "$f" -a -r "$f" ]; then + FDINITRDS=("${FDINITRDS[@]}" "$f") + fname="$(basename "$f")" + initrdopts_syslinux="${initrdopts_syslinux}${irdpfx}${fname}" + irdpfx=, + initrdopts_efi="${initrdopts_efi} initrd=${fname}" fi - mcopy $FBZIMAGE a:linux +done + +# Read a $3-byte littleendian unsigned value at offset $2 from file $1 +le() { + local n=0 + local m=1 + for b in $(od -A n -v -j $2 -N $3 -t u1 "$1"); do + n=$((n + b*m)) + m=$((m * 256)) + done + echo $n } -genfdimage144() { - verify "$MTOOLSRC" - dd if=/dev/zero of=$FIMAGE bs=1024 count=1440 2> /dev/null - mformat v: - syslinux $FIMAGE - echo "$KCMDLINE" | mcopy - v:syslinux.cfg - if [ -f "$FDINITRD" ] ; then - mcopy "$FDINITRD" v:initrd.img - fi - mcopy $FBZIMAGE v:linux +# Get the EFI architecture name such that boot{name}.efi is the default +# boot file name. Returns false with no output if the file is not an +# EFI image or otherwise unknown. +efiarch() { + [ -f "$1" ] || return + [ $(le "$1" 0 2) -eq 23117 ] || return # MZ magic + peoffs=$(le "$1" 60 4) # PE header offset + [ $peoffs -ge 64 ] || return + [ $(le "$1" $peoffs 4) -eq 17744 ] || return # PE magic + case $(le "$1" $((peoffs+4+20)) 2) in # PE type + 267) ;; # PE32 + 523) ;; # PE32+ + *) return 1 ;; # Invalid + esac + [ $(le "$1" $((peoffs+4+20+68)) 2) -eq 10 ] || return # EFI app + case $(le "$1" $((peoffs+4)) 2) in # Machine type + 332) echo i386 ;; + 450) echo arm ;; + 512) echo ia64 ;; + 20530) echo riscv32 ;; + 20580) echo riscv64 ;; + 20776) echo riscv128 ;; + 34404) echo x64 ;; + 43620) echo aa64 ;; + esac } -genfdimage288() { - verify "$MTOOLSRC" - dd if=/dev/zero of=$FIMAGE bs=1024 count=2880 2> /dev/null - mformat w: - syslinux $FIMAGE - echo "$KCMDLINE" | mcopy - W:syslinux.cfg - if [ -f "$FDINITRD" ] ; then - mcopy "$FDINITRD" w:initrd.img - fi - mcopy $FBZIMAGE w:linux +# Get the combined sizes in bytes of the files given, counting sparse +# files as full length, and padding each file to a 4K block size +filesizes() { + local t=0 + local s + for s in $(ls -lnL "$@" 2>/dev/null | awk '/^-/{ print $5; }'); do + t=$((t + ((s+4095)/4096)*4096)) + done + echo $t } -geniso() { - tmp_dir=`dirname $FIMAGE`/isoimage - rm -rf $tmp_dir - mkdir $tmp_dir - for i in lib lib64 share ; do - for j in syslinux ISOLINUX ; do - if [ -f /usr/$i/$j/isolinux.bin ] ; then - isolinux=/usr/$i/$j/isolinux.bin - fi +# Expand directory names which should be in /usr/share into a list +# of possible alternatives +sharedirs() { + local dir file + for dir in /usr/share /usr/lib64 /usr/lib; do + for file; do + echo "$dir/$file" + echo "$dir/${file^^}" done - for j in syslinux syslinux/modules/bios ; do - if [ -f /usr/$i/$j/ldlinux.c32 ]; then - ldlinux=/usr/$i/$j/ldlinux.c32 - fi + done +} +efidirs() { + local dir file + for dir in /usr/share /boot /usr/lib64 /usr/lib; do + for file; do + echo "$dir/$file" + echo "$dir/${file^^}" done - if [ -n "$isolinux" -a -n "$ldlinux" ] ; then - break + done +} + +findsyslinux() { + local f="$(find -L $(sharedirs syslinux isolinux) \ + -name "$1" -readable -type f -print -quit 2>/dev/null)" + if [ ! -f "$f" ]; then + die "Need a $1 file, please install syslinux/isolinux." + fi + echo "$f" + return 0 +} + +findovmf() { + local arch="$1" + shift + local -a names=(-false) + local name f + for name; do + names=("${names[@]}" -or -iname "$name") + done + for f in $(find -L $(efidirs edk2 ovmf) \ + \( "${names[@]}" \) -readable -type f \ + -print 2>/dev/null); do + if [ "$(efiarch "$f")" = "$arch" ]; then + echo "$f" + return 0 fi done - if [ -z "$isolinux" ] ; then - echo 'Need an isolinux.bin file, please install syslinux/isolinux.' - exit 1 + die "Need a $1 file for $arch, please install EDK2/OVMF." +} + +do_mcopy() { + if [ ${#FDINITRDS[@]} -gt 0 ]; then + mcopy "${FDINITRDS[@]}" "$1" + fi + if [ -n "$efishell" ]; then + mmd "$1"EFI "$1"EFI/Boot + mcopy "$efishell" "$1"EFI/Boot/boot${kefiarch}.efi fi - if [ -z "$ldlinux" ] ; then - echo 'Need an ldlinux.c32 file, please install syslinux/isolinux.' - exit 1 + if [ -n "$kefiarch" ]; then + echo linux "$KCMDLINE$initrdopts_efi" | \ + mcopy - "$1"startup.nsh fi - cp $isolinux $tmp_dir - cp $ldlinux $tmp_dir - cp $FBZIMAGE $tmp_dir/linux - echo "$KCMDLINE" > $tmp_dir/isolinux.cfg - if [ -f "$FDINITRD" ] ; then - cp "$FDINITRD" $tmp_dir/initrd.img + echo default linux "$KCMDLINE$initrdopts_syslinux" | \ + mcopy - "$1"syslinux.cfg + mcopy "$FBZIMAGE" "$1"linux +} + +genbzdisk() { + verify "$MTOOLSRC" + mformat -v 'LINUX_BOOT' a: + syslinux "$FIMAGE" + do_mcopy a: +} + +genfdimage144() { + verify "$MTOOLSRC" + $dd if=/dev/zero of="$FIMAGE" bs=1024 count=1440 2>/dev/null + mformat -v 'LINUX_BOOT' v: + syslinux "$FIMAGE" + do_mcopy v: +} + +genfdimage288() { + verify "$MTOOLSRC" + $dd if=/dev/zero of="$FIMAGE" bs=1024 count=2880 2>/dev/null + mformat -v 'LINUX_BOOT' w: + syslinux "$FIMAGE" + do_mcopy w: +} + +genhdimage() { + verify "$MTOOLSRC" + mbr="$(findsyslinux mbr.bin)" + kefiarch="$(efiarch "$FBZIMAGE")" + if [ -n "$kefiarch" ]; then + # The efishell provides command line handling + efishell="$(findovmf $kefiarch shell.efi shell${kefiarch}.efi)" + ptype='-T 0xef' # EFI system partition, no GPT fi - genisoimage -J -r -input-charset=utf-8 -quiet -o $FIMAGE \ - -b isolinux.bin -c boot.cat -no-emul-boot -boot-load-size 4 \ - -boot-info-table $tmp_dir - isohybrid $FIMAGE 2>/dev/null || true - rm -rf $tmp_dir + sizes=$(filesizes "$FBZIMAGE" "${FDINITRDS[@]}" "$efishell") + # Allow 1% + 1 MiB for filesystem and partition table overhead, + # syslinux, and config files + megs=$(((sizes + sizes/100 + 2*1024*1024 - 1)/(1024*1024))) + $dd if=/dev/zero of="$FIMAGE" bs=$((1024*1024)) count=$megs 2>/dev/null + mpartition -I -c -s 32 -h 64 -t $megs $ptype -b 512 -a h: + $dd if="$mbr" of="$FIMAGE" bs=440 count=1 conv=notrunc 2>/dev/null + mformat -v 'LINUX_BOOT' -s 32 -h 64 -t $megs h: + syslinux --offset $((512*512)) "$FIMAGE" + do_mcopy h: +} + +geniso() { + tmp_dir="$(dirname "$FIMAGE")/isoimage" + rm -rf "$tmp_dir" + mkdir "$tmp_dir" + isolinux=$(findsyslinux isolinux.bin) + ldlinux=$(findsyslinux ldlinux.c32) + cp "$isolinux" "$ldlinux" "$tmp_dir" + cp "$FBZIMAGE" "$tmp_dir"/linux + echo default linux "$KCMDLINE" > "$tmp_dir"/isolinux.cfg + cp "${FDINITRDS[@]}" "$tmp_dir"/ + genisoimage -J -r -appid 'LINUX_BOOT' -input-charset=utf-8 \ + -quiet -o "$FIMAGE" -b isolinux.bin \ + -c boot.cat -no-emul-boot -boot-load-size 4 \ + -boot-info-table "$tmp_dir" + isohybrid "$FIMAGE" 2>/dev/null || true + rm -rf "$tmp_dir" } -case $1 in +rm -f "$FIMAGE" + +case "$diskfmt" in bzdisk) genbzdisk;; fdimage144) genfdimage144;; fdimage288) genfdimage288;; + hdimage) genhdimage;; isoimage) geniso;; - *) echo 'Unknown image format'; exit 1; + *) die "Unknown image format: $diskfmt";; esac diff --git a/arch/x86/boot/mtools.conf.in b/arch/x86/boot/mtools.conf.in index efd6d2490c1d5..9e2662d013641 100644 --- a/arch/x86/boot/mtools.conf.in +++ b/arch/x86/boot/mtools.conf.in @@ -14,4 +14,7 @@ drive v: drive w: file="@OBJ@/fdimage" cylinders=80 heads=2 sectors=36 filter +# Hard disk +drive h: + file="@OBJ@/hdimage" partition=1 mformat_only -- GitLab From be5bb8021c9731f5593de6419ae35d3f16a3e497 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 10 May 2021 02:09:38 -0700 Subject: [PATCH 0186/3804] x86/asm: Have the __ASM_FORM macros handle commas in arguments The __ASM_FORM macros are really useful, but in order to be able to use them to define instructions via .byte directives breaks because of the necessary commas. Change the macros to handle commas correctly. [ mingo: Removed stray whitespaces & aligned the definitions vertically. ] Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510090940.924953-2-hpa@zytor.com --- arch/x86/include/asm/asm.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 0603c7423aca2..93aad0b638065 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -3,25 +3,24 @@ #define _ASM_X86_ASM_H #ifdef __ASSEMBLY__ -# define __ASM_FORM(x) x -# define __ASM_FORM_RAW(x) x -# define __ASM_FORM_COMMA(x) x, +# define __ASM_FORM(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, #else #include - -# define __ASM_FORM(x) " " __stringify(x) " " -# define __ASM_FORM_RAW(x) __stringify(x) -# define __ASM_FORM_COMMA(x) " " __stringify(x) "," +# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " " +# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__) +# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) "," #endif #ifndef __x86_64__ /* 32 bit */ -# define __ASM_SEL(a,b) __ASM_FORM(a) -# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) +# define __ASM_SEL(a,b) __ASM_FORM(a) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) #else /* 64 bit */ -# define __ASM_SEL(a,b) __ASM_FORM(b) -# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) +# define __ASM_SEL(a,b) __ASM_FORM(b) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) #endif #define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ -- GitLab From d88be187a6e6f3a97dfa7ddc500bb9ca191b3772 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 10 May 2021 02:09:39 -0700 Subject: [PATCH 0187/3804] x86/asm: Add _ASM_BYTES() macro for a .byte ... opcode sequence Make it easy to create a sequence of bytes that can be used in either assembly proper on in a C asm() statement. Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510090940.924953-3-hpa@zytor.com --- arch/x86/include/asm/asm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 93aad0b638065..507a37a460276 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -13,6 +13,8 @@ # define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) "," #endif +#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;) + #ifndef __x86_64__ /* 32 bit */ # define __ASM_SEL(a,b) __ASM_FORM(a) -- GitLab From eef23e72b78b36924aea8be5ec7c54e628c442ef Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 10 May 2021 02:09:40 -0700 Subject: [PATCH 0188/3804] x86/asm: Use _ASM_BYTES() in Use the new generalized _ASM_BYTES() macro from instead of the "home grown" _ASM_MK_NOP() in . Add and update in the tools directory... Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510090940.924953-4-hpa@zytor.com --- arch/x86/include/asm/nops.h | 24 ++-- tools/arch/x86/include/asm/asm.h | 189 ++++++++++++++++++++++++++++++ tools/arch/x86/include/asm/nops.h | 24 ++-- 3 files changed, 209 insertions(+), 28 deletions(-) create mode 100644 tools/arch/x86/include/asm/asm.h diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h index c1e5e818ba160..c5573eaa5bb98 100644 --- a/arch/x86/include/asm/nops.h +++ b/arch/x86/include/asm/nops.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_NOPS_H #define _ASM_X86_NOPS_H +#include + /* * Define nops for use with alternative() and for tracing. */ @@ -57,20 +59,14 @@ #endif /* CONFIG_64BIT */ -#ifdef __ASSEMBLY__ -#define _ASM_MK_NOP(x) .byte x -#else -#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" -#endif - -#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8) +#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1) +#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2) +#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3) +#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4) +#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5) +#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6) +#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) +#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) #define ASM_NOP_MAX 8 diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h new file mode 100644 index 0000000000000..507a37a460276 --- /dev/null +++ b/tools/arch/x86/include/asm/asm.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_ASM_H +#define _ASM_X86_ASM_H + +#ifdef __ASSEMBLY__ +# define __ASM_FORM(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__ +# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__, +#else +#include +# define __ASM_FORM(x, ...) " " __stringify(x,##__VA_ARGS__) " " +# define __ASM_FORM_RAW(x, ...) __stringify(x,##__VA_ARGS__) +# define __ASM_FORM_COMMA(x, ...) " " __stringify(x,##__VA_ARGS__) "," +#endif + +#define _ASM_BYTES(x, ...) __ASM_FORM(.byte x,##__VA_ARGS__ ;) + +#ifndef __x86_64__ +/* 32 bit */ +# define __ASM_SEL(a,b) __ASM_FORM(a) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(a) +#else +/* 64 bit */ +# define __ASM_SEL(a,b) __ASM_FORM(b) +# define __ASM_SEL_RAW(a,b) __ASM_FORM_RAW(b) +#endif + +#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ + inst##q##__VA_ARGS__) +#define __ASM_REG(reg) __ASM_SEL_RAW(e##reg, r##reg) + +#define _ASM_PTR __ASM_SEL(.long, .quad) +#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) + +#define _ASM_MOV __ASM_SIZE(mov) +#define _ASM_INC __ASM_SIZE(inc) +#define _ASM_DEC __ASM_SIZE(dec) +#define _ASM_ADD __ASM_SIZE(add) +#define _ASM_SUB __ASM_SIZE(sub) +#define _ASM_XADD __ASM_SIZE(xadd) +#define _ASM_MUL __ASM_SIZE(mul) + +#define _ASM_AX __ASM_REG(ax) +#define _ASM_BX __ASM_REG(bx) +#define _ASM_CX __ASM_REG(cx) +#define _ASM_DX __ASM_REG(dx) +#define _ASM_SP __ASM_REG(sp) +#define _ASM_BP __ASM_REG(bp) +#define _ASM_SI __ASM_REG(si) +#define _ASM_DI __ASM_REG(di) + +#ifndef __x86_64__ +/* 32 bit */ + +#define _ASM_ARG1 _ASM_AX +#define _ASM_ARG2 _ASM_DX +#define _ASM_ARG3 _ASM_CX + +#define _ASM_ARG1L eax +#define _ASM_ARG2L edx +#define _ASM_ARG3L ecx + +#define _ASM_ARG1W ax +#define _ASM_ARG2W dx +#define _ASM_ARG3W cx + +#define _ASM_ARG1B al +#define _ASM_ARG2B dl +#define _ASM_ARG3B cl + +#else +/* 64 bit */ + +#define _ASM_ARG1 _ASM_DI +#define _ASM_ARG2 _ASM_SI +#define _ASM_ARG3 _ASM_DX +#define _ASM_ARG4 _ASM_CX +#define _ASM_ARG5 r8 +#define _ASM_ARG6 r9 + +#define _ASM_ARG1Q rdi +#define _ASM_ARG2Q rsi +#define _ASM_ARG3Q rdx +#define _ASM_ARG4Q rcx +#define _ASM_ARG5Q r8 +#define _ASM_ARG6Q r9 + +#define _ASM_ARG1L edi +#define _ASM_ARG2L esi +#define _ASM_ARG3L edx +#define _ASM_ARG4L ecx +#define _ASM_ARG5L r8d +#define _ASM_ARG6L r9d + +#define _ASM_ARG1W di +#define _ASM_ARG2W si +#define _ASM_ARG3W dx +#define _ASM_ARG4W cx +#define _ASM_ARG5W r8w +#define _ASM_ARG6W r9w + +#define _ASM_ARG1B dil +#define _ASM_ARG2B sil +#define _ASM_ARG3B dl +#define _ASM_ARG4B cl +#define _ASM_ARG5B r8b +#define _ASM_ARG6B r9b + +#endif + +/* + * Macros to generate condition code outputs from inline assembly, + * The output operand must be type "bool". + */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +# define CC_SET(c) "\n\t/* output condition code " #c "*/\n" +# define CC_OUT(c) "=@cc" #c +#else +# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n" +# define CC_OUT(c) [_cc_ ## c] "=qm" +#endif + +/* Exception table entry */ +#ifdef __ASSEMBLY__ +# define _ASM_EXTABLE_HANDLE(from, to, handler) \ + .pushsection "__ex_table","a" ; \ + .balign 4 ; \ + .long (from) - . ; \ + .long (to) - . ; \ + .long (handler) - . ; \ + .popsection + +# define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + +# define _ASM_EXTABLE_UA(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) + +# define _ASM_EXTABLE_CPY(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) + +# define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) + +# ifdef CONFIG_KPROBES +# define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist","aw" ; \ + _ASM_ALIGN ; \ + _ASM_PTR (entry); \ + .popsection +# else +# define _ASM_NOKPROBE(entry) +# endif + +#else /* ! __ASSEMBLY__ */ +# define _EXPAND_EXTABLE_HANDLE(x) #x +# define _ASM_EXTABLE_HANDLE(from, to, handler) \ + " .pushsection \"__ex_table\",\"a\"\n" \ + " .balign 4\n" \ + " .long (" #from ") - .\n" \ + " .long (" #to ") - .\n" \ + " .long (" _EXPAND_EXTABLE_HANDLE(handler) ") - .\n" \ + " .popsection\n" + +# define _ASM_EXTABLE(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_default) + +# define _ASM_EXTABLE_UA(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess) + +# define _ASM_EXTABLE_CPY(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy) + +# define _ASM_EXTABLE_FAULT(from, to) \ + _ASM_EXTABLE_HANDLE(from, to, ex_handler_fault) + +/* For C file, we already have NOKPROBE_SYMBOL macro */ + +/* + * This output constraint should be used for any inline asm which has a "call" + * instruction. Otherwise the asm may be inserted before the frame pointer + * gets set up by the containing function. If you forget to do this, objtool + * may print a "call without frame pointer save/setup" warning. + */ +register unsigned long current_stack_pointer asm(_ASM_SP); +#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer) +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_ASM_H */ diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h index c1e5e818ba160..c5573eaa5bb98 100644 --- a/tools/arch/x86/include/asm/nops.h +++ b/tools/arch/x86/include/asm/nops.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_NOPS_H #define _ASM_X86_NOPS_H +#include + /* * Define nops for use with alternative() and for tracing. */ @@ -57,20 +59,14 @@ #endif /* CONFIG_64BIT */ -#ifdef __ASSEMBLY__ -#define _ASM_MK_NOP(x) .byte x -#else -#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n" -#endif - -#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1) -#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2) -#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3) -#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4) -#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5) -#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6) -#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7) -#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8) +#define ASM_NOP1 _ASM_BYTES(BYTES_NOP1) +#define ASM_NOP2 _ASM_BYTES(BYTES_NOP2) +#define ASM_NOP3 _ASM_BYTES(BYTES_NOP3) +#define ASM_NOP4 _ASM_BYTES(BYTES_NOP4) +#define ASM_NOP5 _ASM_BYTES(BYTES_NOP5) +#define ASM_NOP6 _ASM_BYTES(BYTES_NOP6) +#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7) +#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8) #define ASM_NOP_MAX 8 -- GitLab From e5af36b2adb858e982d78d41d7363d05d951a19a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Apr 2021 19:40:56 +0200 Subject: [PATCH 0189/3804] cpufreq: intel_pstate: Use HWP if enabled by platform firmware It turns out that there are systems where HWP is enabled during initialization by the platform firmware (BIOS), but HWP EPP support is not advertised. After commit 7aa1031223bc ("cpufreq: intel_pstate: Avoid enabling HWP if EPP is not supported") intel_pstate refuses to use HWP on those systems, but the fallback PERF_CTL interface does not work on them either because of enabled HWP, and once enabled, HWP cannot be disabled. Consequently, the users of those systems cannot control CPU performance scaling. Address this issue by making intel_pstate use HWP unconditionally if it is enabled already when the driver starts. Fixes: 7aa1031223bc ("cpufreq: intel_pstate: Avoid enabling HWP if EPP is not supported") Reported-by: Srinivas Pandruvada Tested-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki Cc: 5.9+ # 5.9+ --- drivers/cpufreq/intel_pstate.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index f0401064d7aa5..0e69dffd5a767 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3033,6 +3033,14 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = { {} }; +static bool intel_pstate_hwp_is_enabled(void) +{ + u64 value; + + rdmsrl(MSR_PM_ENABLE, value); + return !!(value & 0x1); +} + static int __init intel_pstate_init(void) { const struct x86_cpu_id *id; @@ -3051,8 +3059,12 @@ static int __init intel_pstate_init(void) * Avoid enabling HWP for processors without EPP support, * because that means incomplete HWP implementation which is a * corner case and supporting it is generally problematic. + * + * If HWP is enabled already, though, there is no choice but to + * deal with it. */ - if (!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) { + if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) || + intel_pstate_hwp_is_enabled()) { hwp_active++; hwp_mode_bdw = id->driver_data; intel_pstate.attr = hwp_cpufreq_attrs; -- GitLab From d4335d058f8430a0ce2b43dab9531f3a3cf9fe2c Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 10 May 2021 11:38:44 +0100 Subject: [PATCH 0190/3804] ASoC: codecs: lpass-rx-macro: add missing MODULE_DEVICE_TABLE Fix module loading by adding missing MODULE_DEVICE_TABLE. Signed-off-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210510103844.1532-1-srinivas.kandagatla@linaro.org Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-rx-macro.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index 4f1b569d7c472..e074c7908c232 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -3579,6 +3579,7 @@ static const struct of_device_id rx_macro_dt_match[] = { { .compatible = "qcom,sm8250-lpass-rx-macro" }, { } }; +MODULE_DEVICE_TABLE(of, rx_macro_dt_match); static struct platform_driver rx_macro_driver = { .driver = { -- GitLab From 14c0c423746fe7232a093a68809a4bc6233eed60 Mon Sep 17 00:00:00 2001 From: Bixuan Cui Date: Sat, 8 May 2021 11:15:12 +0800 Subject: [PATCH 0191/3804] ASoC: codecs: lpass-tx-macro: add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Bixuan Cui Reviewed-by: Srinivas Kandagatla Link: https://lore.kernel.org/r/20210508031512.53783-1-cuibixuan@huawei.com Signed-off-by: Mark Brown --- sound/soc/codecs/lpass-tx-macro.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c index 4eede9ad57bfd..3d3a6e31551b7 100644 --- a/sound/soc/codecs/lpass-tx-macro.c +++ b/sound/soc/codecs/lpass-tx-macro.c @@ -1846,6 +1846,7 @@ static const struct of_device_id tx_macro_dt_match[] = { { .compatible = "qcom,sm8250-lpass-tx-macro" }, { } }; +MODULE_DEVICE_TABLE(of, tx_macro_dt_match); static struct platform_driver tx_macro_driver = { .driver = { .name = "tx_macro", -- GitLab From b23584d6ce0212b9ad6cb7be19a7123461ed9e09 Mon Sep 17 00:00:00 2001 From: Shengjiu Wang Date: Sat, 8 May 2021 18:46:47 +0800 Subject: [PATCH 0192/3804] ASoC: ak5558: Correct the dai name for ak5552 Correct the dai name for ak5552. The name should be "ak5552-aif". Fixes: d8c5c82e4e5b ("ASoC: ak5558: Add support for ak5552") Signed-off-by: Shengjiu Wang Link: https://lore.kernel.org/r/1620470807-12056-1-git-send-email-shengjiu.wang@nxp.com Signed-off-by: Mark Brown --- sound/soc/codecs/ak5558.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/ak5558.c b/sound/soc/codecs/ak5558.c index 34aed80db0eb0..37d4600b6f2c2 100644 --- a/sound/soc/codecs/ak5558.c +++ b/sound/soc/codecs/ak5558.c @@ -307,7 +307,7 @@ static struct snd_soc_dai_driver ak5558_dai = { }; static struct snd_soc_dai_driver ak5552_dai = { - .name = "ak5558-aif", + .name = "ak5552-aif", .capture = { .stream_name = "Capture", .channels_min = 1, -- GitLab From 680ec0549a055eb464dce6ffb4bfb736ef87236e Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 9 May 2021 21:12:27 +0200 Subject: [PATCH 0193/3804] spi: spi-fsl-dspi: Fix a resource leak in an error handling path 'dspi_request_dma()' should be undone by a 'dspi_release_dma()' call in the error handling path of the probe function, as already done in the remove function Fixes: 90ba37033cb9 ("spi: spi-fsl-dspi: Add DMA support for Vybrid") Signed-off-by: Christophe JAILLET Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/d51caaac747277a1099ba8dea07acd85435b857e.1620587472.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/spi/spi-fsl-dspi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c index 0287366874882..fb45e6af66381 100644 --- a/drivers/spi/spi-fsl-dspi.c +++ b/drivers/spi/spi-fsl-dspi.c @@ -1375,11 +1375,13 @@ poll_mode: ret = spi_register_controller(ctlr); if (ret != 0) { dev_err(&pdev->dev, "Problem registering DSPI ctlr\n"); - goto out_free_irq; + goto out_release_dma; } return ret; +out_release_dma: + dspi_release_dma(dspi); out_free_irq: if (dspi->irq) free_irq(dspi->irq, dspi); -- GitLab From dc5fa590273890a8541ce6e999d606bfb2d73797 Mon Sep 17 00:00:00 2001 From: Leilk Liu Date: Sat, 8 May 2021 14:02:14 +0800 Subject: [PATCH 0194/3804] spi: take the SPI IO-mutex in the spi_set_cs_timing method this patch takes the io_mutex to prevent an unprotected HW register modification in the set_cs_timing callback. Fixes: 4cea6b8cc34e ("spi: add power control when set_cs_timing") Signed-off-by: Leilk Liu Link: https://lore.kernel.org/r/20210508060214.1485-1-leilk.liu@mediatek.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index f9885c0965637..a565e7d6bf3ba 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3457,9 +3457,12 @@ int spi_set_cs_timing(struct spi_device *spi, struct spi_delay *setup, if (spi->controller->set_cs_timing && !(spi->cs_gpiod || gpio_is_valid(spi->cs_gpio))) { + mutex_lock(&spi->controller->io_mutex); + if (spi->controller->auto_runtime_pm) { status = pm_runtime_get_sync(parent); if (status < 0) { + mutex_unlock(&spi->controller->io_mutex); pm_runtime_put_noidle(parent); dev_err(&spi->controller->dev, "Failed to power device: %d\n", status); @@ -3470,11 +3473,13 @@ int spi_set_cs_timing(struct spi_device *spi, struct spi_delay *setup, hold, inactive); pm_runtime_mark_last_busy(parent); pm_runtime_put_autosuspend(parent); - return status; } else { - return spi->controller->set_cs_timing(spi, setup, hold, + status = spi->controller->set_cs_timing(spi, setup, hold, inactive); } + + mutex_unlock(&spi->controller->io_mutex); + return status; } if ((setup && setup->unit == SPI_DELAY_UNIT_SCK) || -- GitLab From a3bc4ffeedf4693262fe7c6d133dcfcacd3d18c2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 May 2021 11:48:26 -0300 Subject: [PATCH 0195/3804] tools headers UAPI: Update tools's copy of drm.h headers Picking the changes from: b603e810f740e76b ("drm/uapi: document kernel capabilities") Doesn't result in any tooling changes: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/drm.h tools/include/uapi/drm/drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after Silencing these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h' diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h Cc: Simon Ser Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 125 +++++++++++++++++++++++++++++++++-- 1 file changed, 121 insertions(+), 4 deletions(-) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 0827037c54847..67b94bc3c8852 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -625,30 +625,147 @@ struct drm_gem_open { __u64 size; }; +/** + * DRM_CAP_DUMB_BUFFER + * + * If set to 1, the driver supports creating dumb buffers via the + * &DRM_IOCTL_MODE_CREATE_DUMB ioctl. + */ #define DRM_CAP_DUMB_BUFFER 0x1 +/** + * DRM_CAP_VBLANK_HIGH_CRTC + * + * If set to 1, the kernel supports specifying a CRTC index in the high bits of + * &drm_wait_vblank_request.type. + * + * Starting kernel version 2.6.39, this capability is always set to 1. + */ #define DRM_CAP_VBLANK_HIGH_CRTC 0x2 +/** + * DRM_CAP_DUMB_PREFERRED_DEPTH + * + * The preferred bit depth for dumb buffers. + * + * The bit depth is the number of bits used to indicate the color of a single + * pixel excluding any padding. This is different from the number of bits per + * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per + * pixel. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ #define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3 +/** + * DRM_CAP_DUMB_PREFER_SHADOW + * + * If set to 1, the driver prefers userspace to render to a shadow buffer + * instead of directly rendering to a dumb buffer. For best speed, userspace + * should do streaming ordered memory copies into the dumb buffer and never + * read from it. + * + * Note that this preference only applies to dumb buffers, it's irrelevant for + * other types of buffers. + */ #define DRM_CAP_DUMB_PREFER_SHADOW 0x4 +/** + * DRM_CAP_PRIME + * + * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT + * and &DRM_PRIME_CAP_EXPORT. + * + * PRIME buffers are exposed as dma-buf file descriptors. See + * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing". + */ #define DRM_CAP_PRIME 0x5 +/** + * DRM_PRIME_CAP_IMPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME + * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl. + */ #define DRM_PRIME_CAP_IMPORT 0x1 +/** + * DRM_PRIME_CAP_EXPORT + * + * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME + * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl. + */ #define DRM_PRIME_CAP_EXPORT 0x2 +/** + * DRM_CAP_TIMESTAMP_MONOTONIC + * + * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in + * struct drm_event_vblank. If set to 1, the kernel will report timestamps with + * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these + * clocks. + * + * Starting from kernel version 2.6.39, the default value for this capability + * is 1. Starting kernel version 4.15, this capability is always set to 1. + */ #define DRM_CAP_TIMESTAMP_MONOTONIC 0x6 +/** + * DRM_CAP_ASYNC_PAGE_FLIP + * + * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC. + */ #define DRM_CAP_ASYNC_PAGE_FLIP 0x7 -/* - * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight - * combination for the hardware cursor. The intention is that a hardware - * agnostic userspace can query a cursor plane size to use. +/** + * DRM_CAP_CURSOR_WIDTH + * + * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid + * width x height combination for the hardware cursor. The intention is that a + * hardware agnostic userspace can query a cursor plane size to use. * * Note that the cross-driver contract is to merely return a valid size; * drivers are free to attach another meaning on top, eg. i915 returns the * maximum plane size. */ #define DRM_CAP_CURSOR_WIDTH 0x8 +/** + * DRM_CAP_CURSOR_HEIGHT + * + * See &DRM_CAP_CURSOR_WIDTH. + */ #define DRM_CAP_CURSOR_HEIGHT 0x9 +/** + * DRM_CAP_ADDFB2_MODIFIERS + * + * If set to 1, the driver supports supplying modifiers in the + * &DRM_IOCTL_MODE_ADDFB2 ioctl. + */ #define DRM_CAP_ADDFB2_MODIFIERS 0x10 +/** + * DRM_CAP_PAGE_FLIP_TARGET + * + * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and + * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in + * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP + * ioctl. + */ #define DRM_CAP_PAGE_FLIP_TARGET 0x11 +/** + * DRM_CAP_CRTC_IN_VBLANK_EVENT + * + * If set to 1, the kernel supports reporting the CRTC ID in + * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and + * &DRM_EVENT_FLIP_COMPLETE events. + * + * Starting kernel version 4.12, this capability is always set to 1. + */ #define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12 +/** + * DRM_CAP_SYNCOBJ + * + * If set to 1, the driver supports sync objects. See + * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + */ #define DRM_CAP_SYNCOBJ 0x13 +/** + * DRM_CAP_SYNCOBJ_TIMELINE + * + * If set to 1, the driver supports timeline operations on sync objects. See + * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects". + */ #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 /* DRM_IOCTL_GET_CAP ioctl argument type */ -- GitLab From 0fdee797d60d71e5a6fd59aa573d84a858e715dd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 May 2021 11:51:17 -0300 Subject: [PATCH 0196/3804] tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick the changes in: b5b6f6a610127b17 ("drm/i915/gem: Drop legacy execbuffer support (v2)") That don't result in any change in tooling as this is just adding a comment. Only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Daniel Vetter Cc: Jason Ekstrand Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 1987e2ea79a3b..ddc47bbf48b6d 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -943,6 +943,7 @@ struct drm_i915_gem_exec_object { __u64 offset; }; +/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */ struct drm_i915_gem_execbuffer { /** * List of buffers to be validated with their relocations to be -- GitLab From b3172585b13d7171c32cfabdf938eca7fdfe9b31 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 3 May 2021 11:53:37 -0300 Subject: [PATCH 0197/3804] tools arch x86: Sync the msr-index.h copy with the kernel sources To pick up the changes from these csets: d0946a882e622022 ("perf/x86/intel: Hybrid PMU support for perf capabilities") That cause no changes to tooling as it isn't adding any new MSR, just some capabilities for a pre-existing one: $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > before $ cp arch/x86/include/asm/msr-index.h tools/arch/x86/include/asm/msr-index.h $ tools/perf/trace/beauty/tracepoints/x86_msr.sh > after $ diff -u before after $ Just silences this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/msr-index.h' differs from latest version at 'arch/x86/include/asm/msr-index.h' diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Cc: Kan Liang Cc: Peter Zijlstra Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/msr-index.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 45029354e0a8b..742d89a00721d 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -185,6 +185,9 @@ #define MSR_PEBS_DATA_CFG 0x000003f2 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define PERF_CAP_METRICS_IDX 15 +#define PERF_CAP_PT_IDX 16 + #define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 #define MSR_IA32_RTIT_CTL 0x00000570 @@ -265,6 +268,7 @@ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) #define DEBUGCTLMSR_TR (1UL << 6) #define DEBUGCTLMSR_BTS (1UL << 7) #define DEBUGCTLMSR_BTINT (1UL << 8) -- GitLab From e8c1167606c63fd8f9934d0b6ce80281463a4945 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Fri, 2 Apr 2021 18:40:20 +0900 Subject: [PATCH 0198/3804] perf record: Disallow -c and -F option at the same time It's confusing which one is effective when the both options are given. The current code happens to use -c in this case but users might not be aware of it. We can change it to complain about that instead of relying on the implicit priority. Before: $ perf record -c 111111 -F 99 true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.031 MB perf.data (8 samples) ] $ perf evlist -F cycles: sample_period=111111 $ After: $ perf record -c 111111 -F 99 true cannot set frequency and period at the same time $ So this change can break existing usages, but I think it's rare to have both options and it'd be better changing them. Suggested-by: Alexey Alexandrov Signed-off-by: Namhyung Kim Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210402094020.28164-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/record.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index f99852d54b147..43e5b563dee89 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -157,9 +157,15 @@ static int get_max_rate(unsigned int *rate) static int record_opts__config_freq(struct record_opts *opts) { bool user_freq = opts->user_freq != UINT_MAX; + bool user_interval = opts->user_interval != ULLONG_MAX; unsigned int max_rate; - if (opts->user_interval != ULLONG_MAX) + if (user_interval && user_freq) { + pr_err("cannot set frequency and period at the same time\n"); + return -1; + } + + if (user_interval) opts->default_interval = opts->user_interval; if (user_freq) opts->freq = opts->user_freq; -- GitLab From 7aa3c9eabdf76017679e975e2ffd50cde3c010b8 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 6 May 2021 15:56:40 -0700 Subject: [PATCH 0199/3804] perf jevents: Silence warning for ArchStd files JSON files in the level 1 directory are used for ArchStd events (see preprocess_arch_std_files), as such they shouldn't be warned about. Signed-off-by: Ian Rogers Reviewed-by: John Garry Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Joakim Zhang Cc: Kajol Jain Cc: Kim Phillips Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210506225640.1461000-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/jevents.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index ed4f0bd72e5a3..7422b0ea87901 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1123,8 +1123,10 @@ static int process_one_file(const char *fpath, const struct stat *sb, mapfile = strdup(fpath); return 0; } - - pr_info("%s: Ignoring file %s\n", prog, fpath); + if (is_json_file(bname)) + pr_debug("%s: ArchStd json is preprocessed %s\n", prog, fpath); + else + pr_info("%s: Ignoring file %s\n", prog, fpath); return 0; } -- GitLab From a11c9a6e472457cf9eeafb585fc5c912f51d1b23 Mon Sep 17 00:00:00 2001 From: Dmitry Koshelev Date: Thu, 6 May 2021 13:11:49 +0000 Subject: [PATCH 0200/3804] perf session: Fix swapping of cpu_map and stat_config records 'data' field in perf_record_cpu_map_data struct is 16-bit wide and so should be swapped using bswap_16(). 'nr' field in perf_record_stat_config struct should be swapped before being used for size calculation. Signed-off-by: Dmitry Koshelev Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kan Liang Cc: Leo Yan Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210506131244.13328-1-karaghiozis@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index a12cf4f0e97a7..106b3d60881a5 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -904,7 +904,7 @@ static void perf_event__cpu_map_swap(union perf_event *event, struct perf_record_record_cpu_map *mask; unsigned i; - data->type = bswap_64(data->type); + data->type = bswap_16(data->type); switch (data->type) { case PERF_CPU_MAP__CPUS: @@ -937,7 +937,7 @@ static void perf_event__stat_config_swap(union perf_event *event, { u64 size; - size = event->stat_config.nr * sizeof(event->stat_config.data[0]); + size = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]); size += 1; /* nr item itself */ mem_bswap_64(&event->stat_config.nr, size); } -- GitLab From ad1237c30d975535a669746496cbed136aa5a045 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 8 May 2021 22:50:20 +0200 Subject: [PATCH 0201/3804] perf tools: Fix dynamic libbpf link Justin reported broken build with LIBBPF_DYNAMIC=1. When linking libbpf dynamically we need to use perf's hashmap object, because it's not exported in libbpf.so (only in libbpf.a). Following build is now passing: $ make LIBBPF_DYNAMIC=1 BUILD: Doing 'make -j8' parallel build ... $ ldd perf | grep libbpf libbpf.so.0 => /lib64/libbpf.so.0 (0x00007fa7630db000) Fixes: eee19501926d ("perf tools: Grab a copy of libbpf's hashmap") Reported-by: Justin M. Forbes Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210508205020.617984-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 + tools/perf/util/Build | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 0d6619064a838..406a9519145e5 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -540,6 +540,7 @@ ifndef NO_LIBELF ifdef LIBBPF_DYNAMIC ifeq ($(feature-libbpf), 1) EXTLIBS += -lbpf + $(call detected,CONFIG_LIBBPF_DYNAMIC) else dummy := $(error Error: No libbpf devel library found, please install libbpf-devel); endif diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8c0d9f368ebcf..b64bdc1a7026d 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -145,7 +145,14 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o perf-$(CONFIG_LIBELF) += probe-file.o perf-$(CONFIG_LIBELF) += probe-event.o +ifdef CONFIG_LIBBPF_DYNAMIC + hashmap := 1 +endif ifndef CONFIG_LIBBPF + hashmap := 1 +endif + +ifdef hashmap perf-y += hashmap.o endif -- GitLab From 0d943d5fde6070c2661a99618ea95b99655589ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 09:39:02 -0300 Subject: [PATCH 0202/3804] tools headers UAPI: Sync linux/kvm.h with the kernel sources To pick the changes in: 15fb7de1a7f5af0d ("KVM: SVM: Add KVM_SEV_RECEIVE_UPDATE_DATA command") 3bf725699bf62494 ("KVM: arm64: Add support for the KVM PTP service") 4cfdd47d6d95aca4 ("KVM: SVM: Add KVM_SEV SEND_START command") 54526d1fd59338fd ("KVM: x86: Support KVM VMs sharing SEV context") 5569e2e7a650dfff ("KVM: SVM: Add support for KVM_SEV_SEND_CANCEL command") 8b13c36493d8cb56 ("KVM: introduce KVM_CAP_SET_GUEST_DEBUG2") af43cbbf954b50ca ("KVM: SVM: Add support for KVM_SEV_RECEIVE_START command") d3d1af85e2c75bb5 ("KVM: SVM: Add KVM_SEND_UPDATE_DATA command") fe7e948837f312d8 ("KVM: x86: Add capability to grant VM access to privileged SGX attribute") That don't cause any change in tooling as it doesn't introduce any new ioctl. $ grep kvm tools/perf/trace/beauty/*.sh tools/perf/trace/beauty/kvm_ioctl.sh:printf "static const char *kvm_ioctl_cmds[] = {\n" tools/perf/trace/beauty/kvm_ioctl.sh:egrep $regex ${header_dir}/kvm.h | \ $ $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after $ This silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: Brijesh Singh Cc: Jianyong Wu Cc: Marc Zyngier Cc: Nathan Tempelman Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Steve Rutherford Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 45 ++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f6afee209620d..3fd9a7e9d90cd 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1078,6 +1078,10 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_DIRTY_LOG_RING 192 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 +#define KVM_CAP_SET_GUEST_DEBUG2 195 +#define KVM_CAP_SGX_ATTRIBUTE 196 +#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197 +#define KVM_CAP_PTP_KVM 198 #ifdef KVM_CAP_IRQ_ROUTING @@ -1671,6 +1675,8 @@ enum sev_cmd_id { KVM_SEV_CERT_EXPORT, /* Attestation report */ KVM_SEV_GET_ATTESTATION_REPORT, + /* Guest Migration Extension */ + KVM_SEV_SEND_CANCEL, KVM_SEV_NR_MAX, }; @@ -1729,6 +1735,45 @@ struct kvm_sev_attestation_report { __u32 len; }; +struct kvm_sev_send_start { + __u32 policy; + __u64 pdh_cert_uaddr; + __u32 pdh_cert_len; + __u64 plat_certs_uaddr; + __u32 plat_certs_len; + __u64 amd_certs_uaddr; + __u32 amd_certs_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_send_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + +struct kvm_sev_receive_start { + __u32 handle; + __u32 policy; + __u64 pdh_uaddr; + __u32 pdh_len; + __u64 session_uaddr; + __u32 session_len; +}; + +struct kvm_sev_receive_update_data { + __u64 hdr_uaddr; + __u32 hdr_len; + __u64 guest_uaddr; + __u32 guest_len; + __u64 trans_uaddr; + __u32 trans_len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) -- GitLab From b35629bc2fd59691504debda99c320cf966c8e3a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 09:45:25 -0300 Subject: [PATCH 0203/3804] tools headers kvm: Sync kvm headers with the kernel sources To pick the changes in: 3c0c2ad1ae75963c ("KVM: VMX: Add basic handling of VM-Exit from SGX enclave") None of them trigger any changes in tooling, this time this is just to silence these perf build warnings: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Paolo Bonzini Cc: Sean Christopherson Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/vmx.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index b8e650a985e35..946d761adbd3d 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -27,6 +27,7 @@ #define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000 +#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE 0x08000000 #define EXIT_REASON_EXCEPTION_NMI 0 #define EXIT_REASON_EXTERNAL_INTERRUPT 1 -- GitLab From a00b7e39d6b56e6f49cdd51a9ebf92627a19d877 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 7 May 2021 17:54:35 +0900 Subject: [PATCH 0204/3804] perf tools: Fix a build error on arm64 with clang Since clang's -Wmissing-field-initializers warns if a data structure is initialized with a signle NULL as below, ---- tools/perf $ make CC=clang LLVM=1 ... arch/arm64/util/kvm-stat.c:74:9: error: missing field 'ops' initializer [-Werror,-Wmissing-field-initializers] { NULL }, ^ 1 error generated. ---- add another field initializer expressly as same as other arch's kvm-stat.c code. Signed-off-by: Masami Hiramatsu Cc: Anders Roxell Cc: Leo Yan Cc: Sergey Senozhatsky Link: http://lore.kernel.org/lkml/162037767540.94840.15758657049033010518.stgit@devnote2 Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/kvm-stat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c index 2303256b7d05e..73d18e0ed6f6a 100644 --- a/tools/perf/arch/arm64/util/kvm-stat.c +++ b/tools/perf/arch/arm64/util/kvm-stat.c @@ -71,7 +71,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = { .name = "vmexit", .ops = &exit_events, }, - { NULL }, + { NULL, NULL }, }; const char * const kvm_skip_events[] = { -- GitLab From f8bcb061ea013a9b39a071b9dd9f6ea0aa2caf72 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 09:55:30 -0300 Subject: [PATCH 0205/3804] tools headers UAPI: Sync files changed by landlock, quotactl_path and mount_settattr new syscalls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To pick the changes in these csets: a49f4f81cb48925e ("arch: Wire up Landlock syscalls") 2a1867219c7b27f9 ("fs: add mount_setattr()") fa8b90070a80bb1a ("quota: wire up quotactl_path") That silences these perf build warnings and add support for those new syscalls in tools such as 'perf trace'. For instance, this is now possible: # ~acme/bin/perf trace -v -e landlock* event qualifier tracepoint filter: (common_pid != 129365 && common_pid != 3502) && (id == 444 || id == 445 || id == 446) ^C# That is tha filter expression attached to the raw_syscalls:sys_{enter,exit} tracepoints. $ grep landlock tools/perf/arch/x86/entry/syscalls/syscall_64.tbl 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self $ This addresses these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl Warning: Kernel ABI header at 'tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl' differs from latest version at 'arch/mips/kernel/syscalls/syscall_n64.tbl' diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl Cc: Christian Brauner Cc: James Morris Cc: Jan Kara Cc: Mickaël Salaün Cc: Sascha Hauer Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 11 ++++++++++- tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl | 5 +++++ tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 4 ++++ tools/perf/arch/s390/entry/syscalls/syscall.tbl | 4 ++++ tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 4 ++++ 5 files changed, 27 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index ce58cff99b665..6de5a7fc066b8 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -863,9 +863,18 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise) __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) #define __NR_mount_setattr 442 __SYSCALL(__NR_mount_setattr, sys_mount_setattr) +#define __NR_quotactl_path 443 +__SYSCALL(__NR_quotactl_path, sys_quotactl_path) + +#define __NR_landlock_create_ruleset 444 +__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset) +#define __NR_landlock_add_rule 445 +__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) +#define __NR_landlock_restrict_self 446 +__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #undef __NR_syscalls -#define __NR_syscalls 443 +#define __NR_syscalls 447 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index 91649690b52f1..9974f5f8e49bc 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -356,3 +356,8 @@ 439 n64 faccessat2 sys_faccessat2 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 +442 n64 mount_setattr sys_mount_setattr +443 n64 quotactl_path sys_quotactl_path +444 n64 landlock_create_ruleset sys_landlock_create_ruleset +445 n64 landlock_add_rule sys_landlock_add_rule +446 n64 landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 0b2480cf3e479..2e68fbb57cc66 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -522,3 +522,7 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr +443 common quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 3abef2144dac7..7e4a2aba366df 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -445,3 +445,7 @@ 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr sys_mount_setattr +443 common quotactl_path sys_quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 7bf01cbe582f0..ecd551b08d052 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -364,6 +364,10 @@ 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 442 common mount_setattr sys_mount_setattr +443 common quotactl_path sys_quotactl_path +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self # # Due to a historical design error, certain syscalls are numbered differently -- GitLab From 5a80ee4219a52194f0e815bbceec40eb32c523ec Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:06:40 -0300 Subject: [PATCH 0206/3804] tools headers UAPI: Sync linux/prctl.h with the kernel sources To pick a new prctl introduced in: 201698626fbca1cf ("arm64: Introduce prctl(PR_PAC_{SET,GET}_ENABLED_KEYS)") That results in $ grep prctl tools/perf/trace/beauty/*.sh tools/perf/trace/beauty/prctl_option.sh:printf "static const char *prctl_options[] = {\n" tools/perf/trace/beauty/prctl_option.sh:egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ tools/perf/trace/beauty/prctl_option.sh:printf "static const char *prctl_set_mm_options[] = {\n" tools/perf/trace/beauty/prctl_option.sh:egrep $regex ${header_dir}/prctl.h | \ tools/perf/trace/beauty/x86_arch_prctl.sh:prctl_arch_header=${x86_header_dir}/prctl.h tools/perf/trace/beauty/x86_arch_prctl.sh: printf "#define x86_arch_prctl_codes_%d_offset %s\n" $idx $first_entry tools/perf/trace/beauty/x86_arch_prctl.sh: printf "static const char *x86_arch_prctl_codes_%d[] = {\n" $idx tools/perf/trace/beauty/x86_arch_prctl.sh: egrep -q $regex ${prctl_arch_header} && \ tools/perf/trace/beauty/x86_arch_prctl.sh: (egrep $regex ${prctl_arch_header} | \ $ tools/perf/trace/beauty/prctl_option.sh > before $ cp include/uapi/linux/prctl.h tools/include/uapi/linux/prctl.h $ tools/perf/trace/beauty/prctl_option.sh > after $ diff -u before after --- before 2021-05-09 10:06:10.064559675 -0300 +++ after 2021-05-09 10:06:21.319791396 -0300 @@ -54,6 +54,8 @@ [57] = "SET_IO_FLUSHER", [58] = "GET_IO_FLUSHER", [59] = "SET_SYSCALL_USER_DISPATCH", + [60] = "PAC_SET_ENABLED_KEYS", + [61] = "PAC_GET_ENABLED_KEYS", }; static const char *prctl_set_mm_options[] = { [1] = "START_CODE", $ Now users can do: # perf trace -e syscalls:sys_enter_prctl --filter "option==PAC_GET_ENABLED_KEYS" ^C# # trace -v -e syscalls:sys_enter_prctl --filter "option==PAC_GET_ENABLED_KEYS" New filter for syscalls:sys_enter_prctl: (option==0x3d) && (common_pid != 5519 && common_pid != 3404) ^C# And also when prctl appears in a session, its options will be translated to the string. Cc: Catalin Marinas Cc: Peter Collingbourne Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/prctl.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 667f1aed091c2..18a9f59dc067f 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -255,4 +255,8 @@ struct prctl_mm_map { # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 +/* Set/get enabled arm64 pointer authentication keys */ +#define PR_PAC_SET_ENABLED_KEYS 60 +#define PR_PAC_GET_ENABLED_KEYS 61 + #endif /* _LINUX_PRCTL_H */ -- GitLab From fb24e308b6310541e70d11a3f19dc40742974b95 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:19:37 -0300 Subject: [PATCH 0207/3804] tools arch: Update arch/x86/lib/mem{cpy,set}_64.S copies used in 'perf bench mem memcpy' To bring in the change made in this cset: 5e21a3ecad1500e3 ("x86/alternative: Merge include files") This just silences these perf tools build warnings, no change in the tools: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S' diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Cc: Borislav Petkov Cc: Juergen Gross Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/lib/memcpy_64.S | 2 +- tools/arch/x86/lib/memset_64.S | 2 +- tools/include/asm/{alternative-asm.h => alternative.h} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename tools/include/asm/{alternative-asm.h => alternative.h} (100%) diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 1e299ac73c869..1cc9da6e29c79 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include .pushsection .noinstr.text, "ax" diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0bfd26e4ca9e9..9827ae267f96e 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,7 +3,7 @@ #include #include -#include +#include #include /* diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative.h similarity index 100% rename from tools/include/asm/alternative-asm.h rename to tools/include/asm/alternative.h -- GitLab From 3916329309eace19e8c32bc821064a119474c309 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:21:33 -0300 Subject: [PATCH 0208/3804] tools include UAPI powerpc: Sync errno.h with the kernel headers To pick the change in: 7de21e679e6a789f ("powerpc: fix EDEADLOCK redefinition error in uapi/asm/errno.h") That will make the errno number -> string tables to pick this change on powerpc. Silencing this perf build warning: Warning: Kernel ABI header at 'tools/arch/powerpc/include/uapi/asm/errno.h' differs from latest version at 'arch/powerpc/include/uapi/asm/errno.h' diff -u tools/arch/powerpc/include/uapi/asm/errno.h arch/powerpc/include/uapi/asm/errno.h Cc: Michael Ellerman Cc: Tony Ambardar Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/errno.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/powerpc/include/uapi/asm/errno.h b/tools/arch/powerpc/include/uapi/asm/errno.h index cc79856896a19..4ba87de32be00 100644 --- a/tools/arch/powerpc/include/uapi/asm/errno.h +++ b/tools/arch/powerpc/include/uapi/asm/errno.h @@ -2,6 +2,7 @@ #ifndef _ASM_POWERPC_ERRNO_H #define _ASM_POWERPC_ERRNO_H +#undef EDEADLOCK #include #undef EDEADLOCK -- GitLab From 6faf64f5248166ecaf50107e883c383e0b66bb70 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:24:29 -0300 Subject: [PATCH 0209/3804] tools headers cpufeatures: Sync with the kernel sources To pick the changes from: 4e6292114c741221 ("x86/paravirt: Add new features for paravirt patching") a161545ab53b174c ("x86/cpufeatures: Enumerate Intel Hybrid Technology feature bit") a89dfde3dc3c2dbf ("x86: Remove dynamic NOP selection") b8921dccf3b25798 ("x86/cpufeatures: Add SGX1 and SGX2 sub-features") f21d4d3b97a86035 ("x86/cpufeatures: Enumerate #DB for bus lock detection") f333374e108e7e4c ("x86/cpufeatures: Add the Virtual SPEC_CTRL feature") This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Babu Moger Cc: Borislav Petkov Cc: Fenghua Yu Cc: Juergen Gross Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Ricardo Neri Cc: Sean Christopherson Cc: Thomas Gleixner Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index cc96e26d69f7a..ac37830ae9412 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -84,7 +84,7 @@ /* CPU types for specific tunings: */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ +/* FREE, was #define X86_FEATURE_K7 ( 3*32+ 5) "" Athlon */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ @@ -236,6 +236,8 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -290,6 +292,8 @@ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ @@ -336,6 +340,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -354,6 +359,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ @@ -374,6 +380,7 @@ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -- GitLab From 71d7924b3e8acaca6a3b0fc3261170031ada3b70 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 9 May 2021 10:29:10 -0300 Subject: [PATCH 0210/3804] tools headers UAPI: Sync perf_event.h with the kernel sources To pick up the changes in: 2b26f0aa004995f4 ("perf: Support only inheriting events if cloned with CLONE_THREAD") 2e498d0a74e5b88a ("perf: Add support for event removal on exec") 547b60988e631f74 ("perf: aux: Add flags for the buffer format") 55bcf6ef314ae8ba ("perf: Extend PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE") 7dde51767ca5339e ("perf: aux: Add CoreSight PMU buffer formats") 97ba62b278674293 ("perf: Add support for SIGTRAP on perf events") d0d1dd628527c77d ("perf core: Add PERF_COUNT_SW_CGROUP_SWITCHES event") Also change the expected sizeof(struct perf_event_attr) from 120 to 128 due to fields being added for the SIGTRAP changes. Addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/perf_event.h' differs from latest version at 'include/uapi/linux/perf_event.h' diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Cc: Kan Liang Cc: Marco Elver Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki K Poulose Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/perf_event.h | 26 ++++++++++++++++++++----- tools/perf/tests/attr/base-record | 2 +- tools/perf/tests/attr/base-stat | 2 +- tools/perf/tests/attr/system-wide-dummy | 2 +- 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 14332f4cf8167..bf8143505c49d 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -127,6 +127,7 @@ enum perf_sw_ids { PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_DUMMY = 9, PERF_COUNT_SW_BPF_OUTPUT = 10, + PERF_COUNT_SW_CGROUP_SWITCHES = 11, PERF_COUNT_SW_MAX, /* non-ABI */ }; @@ -326,6 +327,7 @@ enum perf_event_read_format { #define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ #define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ #define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ /* * Hardware event_id to monitor via a performance monitoring event: @@ -404,7 +406,10 @@ struct perf_event_attr { cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ build_id : 1, /* use build id in mmap2 events */ - __reserved_1 : 29; + inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ + remove_on_exec : 1, /* event is removed from task on exec */ + sigtrap : 1, /* send synchronous SIGTRAP on event */ + __reserved_1 : 26; union { __u32 wakeup_events; /* wakeup every n events */ @@ -456,6 +461,12 @@ struct perf_event_attr { __u16 __reserved_2; __u32 aux_sample_size; __u32 __reserved_3; + + /* + * User provided data if sigtrap=1, passed back to user via + * siginfo_t::si_perf, e.g. to permit user to identify the event. + */ + __u64 sig_data; }; /* @@ -1171,10 +1182,15 @@ enum perf_callchain_context { /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ + +/* CoreSight PMU AUX buffer formats */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ #define PERF_FLAG_FD_NO_GROUP (1UL << 0) #define PERF_FLAG_FD_OUTPUT (1UL << 1) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index 645009c08b3cb..4a7b8deef3fdd 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=120 +size=128 config=0 sample_period=* sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index b0f42c34882e8..4081644565306 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=120 +size=128 config=0 sample_period=0 sample_type=65536 diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy index eba723cc0d380..86a15dd359d93 100644 --- a/tools/perf/tests/attr/system-wide-dummy +++ b/tools/perf/tests/attr/system-wide-dummy @@ -7,7 +7,7 @@ cpu=* pid=-1 flags=8 type=1 -size=120 +size=128 config=9 sample_period=4000 sample_type=455 -- GitLab From 29038ae2ae566d9441e81cda3539db17c20bf06a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 10 May 2021 14:02:17 +0200 Subject: [PATCH 0211/3804] Revert "Revert "ACPI: scan: Turn off unused power resources during initialization"" Revert commit 5db91e9cb5b3 ("Revert "ACPI: scan: Turn off unused power resources during initialization") which was not necessary. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/internal.h | 1 + drivers/acpi/power.c | 2 +- drivers/acpi/scan.c | 2 ++ drivers/acpi/sleep.h | 1 - 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index e6a5d997241c4..9fcefcdc1dbe0 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -139,6 +139,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev, int acpi_power_get_inferred_state(struct acpi_device *device, int *state); int acpi_power_on_resources(struct acpi_device *device, int state); int acpi_power_transition(struct acpi_device *device, int state); +void acpi_turn_off_unused_power_resources(void); /* -------------------------------------------------------------------------- Device Power Management diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c index 7e69931be828c..bacae6d178ff5 100644 --- a/drivers/acpi/power.c +++ b/drivers/acpi/power.c @@ -996,6 +996,7 @@ void acpi_resume_power_resources(void) mutex_unlock(&power_resource_list_lock); } +#endif void acpi_turn_off_unused_power_resources(void) { @@ -1016,4 +1017,3 @@ void acpi_turn_off_unused_power_resources(void) mutex_unlock(&power_resource_list_lock); } -#endif diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index a184529d8fa40..1584c9e463bdf 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2360,6 +2360,8 @@ int __init acpi_scan_init(void) } } + acpi_turn_off_unused_power_resources(); + acpi_scan_initialized = true; out: diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h index 1856f76ac83f7..7fe41ee489d61 100644 --- a/drivers/acpi/sleep.h +++ b/drivers/acpi/sleep.h @@ -8,7 +8,6 @@ extern struct list_head acpi_wakeup_device_list; extern struct mutex acpi_device_lock; extern void acpi_resume_power_resources(void); -extern void acpi_turn_off_unused_power_resources(void); static inline acpi_status acpi_set_waking_vector(u32 wakeup_address) { -- GitLab From 67823d9dadd4dddee4b6bd075f6852b6ade5604a Mon Sep 17 00:00:00 2001 From: Bryan O'Donoghue Date: Tue, 27 Apr 2021 14:07:11 +0100 Subject: [PATCH 0212/3804] regulator: Add a routine to set the current limit for QCOM PMIC VBUS Add hooks to regulator_get_current_limit_regmap() and regulator_set_current_limit_regmap() with an accompanying map of amperages. This lets us use the existing helper functions to map requested current settings to register bit-map/indicies. This change is required to elevate the default 2 Amps set by the bootloader to 3 Amps or indeed to constrain the value lower as the system design may dictate. The valid range is 500 mA to 3 A in increments of 500 mA. Cc: Mark Brown Signed-off-by: Bryan O'Donoghue Link: https://lore.kernel.org/r/20210427130712.2005456-2-bryan.odonoghue@linaro.org Signed-off-by: Mark Brown --- drivers/regulator/qcom_usb_vbus-regulator.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/regulator/qcom_usb_vbus-regulator.c b/drivers/regulator/qcom_usb_vbus-regulator.c index 457788b505720..2e627c2b6c512 100644 --- a/drivers/regulator/qcom_usb_vbus-regulator.c +++ b/drivers/regulator/qcom_usb_vbus-regulator.c @@ -16,13 +16,21 @@ #define CMD_OTG 0x40 #define OTG_EN BIT(0) +#define OTG_CURRENT_LIMIT_CFG 0x52 +#define OTG_CURRENT_LIMIT_MASK GENMASK(2, 0) #define OTG_CFG 0x53 #define OTG_EN_SRC_CFG BIT(1) +static const unsigned int curr_table[] = { + 500000, 1000000, 1500000, 2000000, 2500000, 3000000, +}; + static const struct regulator_ops qcom_usb_vbus_reg_ops = { .enable = regulator_enable_regmap, .disable = regulator_disable_regmap, .is_enabled = regulator_is_enabled_regmap, + .get_current_limit = regulator_get_current_limit_regmap, + .set_current_limit = regulator_set_current_limit_regmap, }; static struct regulator_desc qcom_usb_vbus_rdesc = { @@ -30,6 +38,8 @@ static struct regulator_desc qcom_usb_vbus_rdesc = { .ops = &qcom_usb_vbus_reg_ops, .owner = THIS_MODULE, .type = REGULATOR_VOLTAGE, + .curr_table = curr_table, + .n_current_limits = ARRAY_SIZE(curr_table), }; static int qcom_usb_vbus_regulator_probe(struct platform_device *pdev) @@ -61,6 +71,8 @@ static int qcom_usb_vbus_regulator_probe(struct platform_device *pdev) qcom_usb_vbus_rdesc.enable_reg = base + CMD_OTG; qcom_usb_vbus_rdesc.enable_mask = OTG_EN; + qcom_usb_vbus_rdesc.csel_reg = base + OTG_CURRENT_LIMIT_CFG; + qcom_usb_vbus_rdesc.csel_mask = OTG_CURRENT_LIMIT_MASK; config.dev = dev; config.init_data = init_data; config.of_node = dev->of_node; -- GitLab From 8c816d56a2a4e757bb121d1af4c04f47ac0572d3 Mon Sep 17 00:00:00 2001 From: Bartosz Dudziak Date: Sun, 2 May 2021 13:53:04 +0200 Subject: [PATCH 0213/3804] regulator: qcom_smd: Add PM8226 regulator support Add support for PM8226 regulator which is commonly used with MSM8226 SoCs. Signed-off-by: Bartosz Dudziak Link: https://lore.kernel.org/r/20210502115304.8570-2-bartosz.dudziak@snejp.pl Signed-off-by: Mark Brown --- drivers/regulator/qcom_smd-regulator.c | 83 ++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index bb944ee5fe3b1..05df7b00e3b17 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -251,6 +251,50 @@ static const struct regulator_desc pma8084_switch = { .ops = &rpm_switch_ops, }; +static const struct regulator_desc pm8226_hfsmps = { + .linear_ranges = (struct linear_range[]) { + REGULATOR_LINEAR_RANGE(375000, 0, 95, 12500), + REGULATOR_LINEAR_RANGE(1575000, 96, 158, 25000), + }, + .n_linear_ranges = 2, + .n_voltages = 159, + .ops = &rpm_smps_ldo_ops, +}; + +static const struct regulator_desc pm8226_ftsmps = { + .linear_ranges = (struct linear_range[]) { + REGULATOR_LINEAR_RANGE(350000, 0, 184, 5000), + REGULATOR_LINEAR_RANGE(1280000, 185, 261, 10000), + }, + .n_linear_ranges = 2, + .n_voltages = 262, + .ops = &rpm_smps_ldo_ops, +}; + +static const struct regulator_desc pm8226_pldo = { + .linear_ranges = (struct linear_range[]) { + REGULATOR_LINEAR_RANGE(750000, 0, 63, 12500), + REGULATOR_LINEAR_RANGE(1550000, 64, 126, 25000), + REGULATOR_LINEAR_RANGE(3100000, 127, 163, 50000), + }, + .n_linear_ranges = 3, + .n_voltages = 164, + .ops = &rpm_smps_ldo_ops, +}; + +static const struct regulator_desc pm8226_nldo = { + .linear_ranges = (struct linear_range[]) { + REGULATOR_LINEAR_RANGE(750000, 0, 63, 12500), + }, + .n_linear_ranges = 1, + .n_voltages = 64, + .ops = &rpm_smps_ldo_ops, +}; + +static const struct regulator_desc pm8226_switch = { + .ops = &rpm_switch_ops, +}; + static const struct regulator_desc pm8x41_hfsmps = { .linear_ranges = (struct linear_range[]) { REGULATOR_LINEAR_RANGE( 375000, 0, 95, 12500), @@ -746,6 +790,44 @@ static const struct rpm_regulator_data rpm_pm8916_regulators[] = { {} }; +static const struct rpm_regulator_data rpm_pm8226_regulators[] = { + { "s1", QCOM_SMD_RPM_SMPA, 1, &pm8226_hfsmps, "vdd_s1" }, + { "s2", QCOM_SMD_RPM_SMPA, 2, &pm8226_ftsmps, "vdd_s2" }, + { "s3", QCOM_SMD_RPM_SMPA, 3, &pm8226_hfsmps, "vdd_s3" }, + { "s4", QCOM_SMD_RPM_SMPA, 4, &pm8226_hfsmps, "vdd_s4" }, + { "s5", QCOM_SMD_RPM_SMPA, 5, &pm8226_hfsmps, "vdd_s5" }, + { "l1", QCOM_SMD_RPM_LDOA, 1, &pm8226_nldo, "vdd_l1_l2_l4_l5" }, + { "l2", QCOM_SMD_RPM_LDOA, 2, &pm8226_nldo, "vdd_l1_l2_l4_l5" }, + { "l3", QCOM_SMD_RPM_LDOA, 3, &pm8226_nldo, "vdd_l3_l24_l26" }, + { "l4", QCOM_SMD_RPM_LDOA, 4, &pm8226_nldo, "vdd_l1_l2_l4_l5" }, + { "l5", QCOM_SMD_RPM_LDOA, 5, &pm8226_nldo, "vdd_l1_l2_l4_l5" }, + { "l6", QCOM_SMD_RPM_LDOA, 6, &pm8226_pldo, "vdd_l6_l7_l8_l9_l27" }, + { "l7", QCOM_SMD_RPM_LDOA, 7, &pm8226_pldo, "vdd_l6_l7_l8_l9_l27" }, + { "l8", QCOM_SMD_RPM_LDOA, 8, &pm8226_pldo, "vdd_l6_l7_l8_l9_l27" }, + { "l9", QCOM_SMD_RPM_LDOA, 9, &pm8226_pldo, "vdd_l6_l7_l8_l9_l27" }, + { "l10", QCOM_SMD_RPM_LDOA, 10, &pm8226_pldo, "vdd_l10_l11_l13" }, + { "l11", QCOM_SMD_RPM_LDOA, 11, &pm8226_pldo, "vdd_l10_l11_l13" }, + { "l12", QCOM_SMD_RPM_LDOA, 12, &pm8226_pldo, "vdd_l12_l14" }, + { "l13", QCOM_SMD_RPM_LDOA, 13, &pm8226_pldo, "vdd_l10_l11_l13" }, + { "l14", QCOM_SMD_RPM_LDOA, 14, &pm8226_pldo, "vdd_l12_l14" }, + { "l15", QCOM_SMD_RPM_LDOA, 15, &pm8226_pldo, "vdd_l15_l16_l17_l18" }, + { "l16", QCOM_SMD_RPM_LDOA, 16, &pm8226_pldo, "vdd_l15_l16_l17_l18" }, + { "l17", QCOM_SMD_RPM_LDOA, 17, &pm8226_pldo, "vdd_l15_l16_l17_l18" }, + { "l18", QCOM_SMD_RPM_LDOA, 18, &pm8226_pldo, "vdd_l15_l16_l17_l18" }, + { "l19", QCOM_SMD_RPM_LDOA, 19, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" }, + { "l20", QCOM_SMD_RPM_LDOA, 20, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" }, + { "l21", QCOM_SMD_RPM_LDOA, 21, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" }, + { "l22", QCOM_SMD_RPM_LDOA, 22, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" }, + { "l23", QCOM_SMD_RPM_LDOA, 23, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" }, + { "l24", QCOM_SMD_RPM_LDOA, 24, &pm8226_nldo, "vdd_l3_l24_l26" }, + { "l25", QCOM_SMD_RPM_LDOA, 25, &pm8226_pldo, "vdd_l25" }, + { "l26", QCOM_SMD_RPM_LDOA, 26, &pm8226_nldo, "vdd_l3_l24_l26" }, + { "l27", QCOM_SMD_RPM_LDOA, 27, &pm8226_pldo, "vdd_l6_l7_l8_l9_l27" }, + { "l28", QCOM_SMD_RPM_LDOA, 28, &pm8226_pldo, "vdd_l19_l20_l21_l22_l23_l28" }, + { "lvs1", QCOM_SMD_RPM_VSA, 1, &pm8226_switch, "vdd_lvs1" }, + {} +}; + static const struct rpm_regulator_data rpm_pm8941_regulators[] = { { "s1", QCOM_SMD_RPM_SMPA, 1, &pm8x41_hfsmps, "vdd_s1" }, { "s2", QCOM_SMD_RPM_SMPA, 2, &pm8x41_hfsmps, "vdd_s2" }, @@ -1092,6 +1174,7 @@ static const struct of_device_id rpm_of_match[] = { { .compatible = "qcom,rpm-mp5496-regulators", .data = &rpm_mp5496_regulators }, { .compatible = "qcom,rpm-pm8841-regulators", .data = &rpm_pm8841_regulators }, { .compatible = "qcom,rpm-pm8916-regulators", .data = &rpm_pm8916_regulators }, + { .compatible = "qcom,rpm-pm8226-regulators", .data = &rpm_pm8226_regulators }, { .compatible = "qcom,rpm-pm8941-regulators", .data = &rpm_pm8941_regulators }, { .compatible = "qcom,rpm-pm8950-regulators", .data = &rpm_pm8950_regulators }, { .compatible = "qcom,rpm-pm8953-regulators", .data = &rpm_pm8953_regulators }, -- GitLab From 00c8b0b1e6e1314bb57aab6438fbc2803c637d9d Mon Sep 17 00:00:00 2001 From: Bartosz Dudziak Date: Sun, 2 May 2021 13:53:03 +0200 Subject: [PATCH 0214/3804] regulator: qcom: Document PM8226 smd regulator Document the PM8226 SMD-RPM regulator entry. Signed-off-by: Bartosz Dudziak Acked-by: Rob Herring Link: https://lore.kernel.org/r/20210502115304.8570-1-bartosz.dudziak@snejp.pl Signed-off-by: Mark Brown --- .../bindings/regulator/qcom,smd-rpm-regulator.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml index a35c6cb9bf972..83b53579f4635 100644 --- a/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml +++ b/Documentation/devicetree/bindings/regulator/qcom,smd-rpm-regulator.yaml @@ -24,6 +24,10 @@ description: For mp5496, s2 + For pm8226, s1, s2, s3, s4, s5, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, + l11, l12, l13, l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, l25, + l26, l27, l28, lvs1 + For pm8841, s1, s2, s3, s4, s5, s6, s7, s8 For pm8916, s1, s2, s3, s4, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, @@ -68,6 +72,7 @@ properties: compatible: enum: - qcom,rpm-mp5496-regulators + - qcom,rpm-pm8226-regulators - qcom,rpm-pm8841-regulators - qcom,rpm-pm8916-regulators - qcom,rpm-pm8941-regulators -- GitLab From 4446e6f3bd5c97c312833b445d0eb2ea638c7e98 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Fri, 30 Apr 2021 16:55:55 +0800 Subject: [PATCH 0215/3804] regulator: hi6421v600: Remove unneeded *pmic from struct hi6421_spmi_reg_info Use rdev->regmap instead of pmic->regmap. With this change, hi6421_spmi_regulator_disable can be removed and use regulator_disable_regmap instead. Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20210430085555.1127994-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/hi6421v600-regulator.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/drivers/regulator/hi6421v600-regulator.c b/drivers/regulator/hi6421v600-regulator.c index f6a14e9c3cbfe..feddb0b5d4f1c 100644 --- a/drivers/regulator/hi6421v600-regulator.c +++ b/drivers/regulator/hi6421v600-regulator.c @@ -18,7 +18,6 @@ struct hi6421_spmi_reg_info { struct regulator_desc desc; - struct hi6421_spmi_pmic *pmic; u8 eco_mode_mask; u32 eco_uA; @@ -98,13 +97,12 @@ static const unsigned int ldo34_voltages[] = { static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev) { struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev); - struct hi6421_spmi_pmic *pmic = sreg->pmic; int ret; /* cannot enable more than one regulator at one time */ mutex_lock(&sreg->enable_mutex); - ret = regmap_update_bits(pmic->regmap, rdev->desc->enable_reg, + ret = regmap_update_bits(rdev->regmap, rdev->desc->enable_reg, rdev->desc->enable_mask, rdev->desc->enable_mask); @@ -116,22 +114,12 @@ static int hi6421_spmi_regulator_enable(struct regulator_dev *rdev) return ret; } -static int hi6421_spmi_regulator_disable(struct regulator_dev *rdev) -{ - struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev); - struct hi6421_spmi_pmic *pmic = sreg->pmic; - - return regmap_update_bits(pmic->regmap, rdev->desc->enable_reg, - rdev->desc->enable_mask, 0); -} - static unsigned int hi6421_spmi_regulator_get_mode(struct regulator_dev *rdev) { struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev); - struct hi6421_spmi_pmic *pmic = sreg->pmic; u32 reg_val; - regmap_read(pmic->regmap, rdev->desc->enable_reg, ®_val); + regmap_read(rdev->regmap, rdev->desc->enable_reg, ®_val); if (reg_val & sreg->eco_mode_mask) return REGULATOR_MODE_IDLE; @@ -143,7 +131,6 @@ static int hi6421_spmi_regulator_set_mode(struct regulator_dev *rdev, unsigned int mode) { struct hi6421_spmi_reg_info *sreg = rdev_get_drvdata(rdev); - struct hi6421_spmi_pmic *pmic = sreg->pmic; u32 val; switch (mode) { @@ -157,7 +144,7 @@ static int hi6421_spmi_regulator_set_mode(struct regulator_dev *rdev, return -EINVAL; } - return regmap_update_bits(pmic->regmap, rdev->desc->enable_reg, + return regmap_update_bits(rdev->regmap, rdev->desc->enable_reg, sreg->eco_mode_mask, val); } @@ -177,7 +164,7 @@ hi6421_spmi_regulator_get_optimum_mode(struct regulator_dev *rdev, static const struct regulator_ops hi6421_spmi_ldo_rops = { .is_enabled = regulator_is_enabled_regmap, .enable = hi6421_spmi_regulator_enable, - .disable = hi6421_spmi_regulator_disable, + .disable = regulator_disable_regmap, .list_voltage = regulator_list_voltage_table, .map_voltage = regulator_map_voltage_iterate, .get_voltage_sel = regulator_get_voltage_sel_regmap, @@ -258,7 +245,6 @@ static int hi6421_spmi_regulator_probe(struct platform_device *pdev) if (!sreg) return -ENOMEM; - sreg->pmic = pmic; mutex_init(&sreg->enable_mutex); for (i = 0; i < ARRAY_SIZE(regulator_info); i++) { -- GitLab From 66fe740317c82b0caa68ed8d756536d4ff7e910c Mon Sep 17 00:00:00 2001 From: Jay Fang Date: Mon, 10 May 2021 14:58:20 +0800 Subject: [PATCH 0216/3804] spi: ppc4xx: include instead of Include the more general linux/io.h instead of asm/io.h as checkpatch suggests. Signed-off-by: Jay Fang Link: https://lore.kernel.org/r/1620629903-15493-2-git-send-email-f.fangjian@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-ppc4xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index d8ee363fb7145..9e3974551204d 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include -- GitLab From 856a9260e17129303102a7d4a5f71b7a8739e5b9 Mon Sep 17 00:00:00 2001 From: Jay Fang Date: Mon, 10 May 2021 14:58:21 +0800 Subject: [PATCH 0217/3804] spi: omap-100k: Clean the value of 'status' is not used An error code is set to 'status' before exiting list_for_each_entry() loop, but the value of 'status' is not used as below: list_for_each_entry(t, &m->transfers, transfer_list) { if (t->tx_buf == NULL && t->rx_buf == NULL && t->len) { status = -EINVAL; break; } ... } status = omap1_spi100k_setup_transfer(spi, NULL); Signed-off-by: Jay Fang Link: https://lore.kernel.org/r/1620629903-15493-3-git-send-email-f.fangjian@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-omap-100k.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c index 7062f29022539..dc9b86b648ac0 100644 --- a/drivers/spi/spi-omap-100k.c +++ b/drivers/spi/spi-omap-100k.c @@ -296,7 +296,6 @@ static int omap1_spi100k_transfer_one_message(struct spi_master *master, list_for_each_entry(t, &m->transfers, transfer_list) { if (t->tx_buf == NULL && t->rx_buf == NULL && t->len) { - status = -EINVAL; break; } status = omap1_spi100k_setup_transfer(spi, t); @@ -315,7 +314,6 @@ static int omap1_spi100k_transfer_one_message(struct spi_master *master, m->actual_length += count; if (count != t->len) { - status = -EIO; break; } } -- GitLab From db56d03049524114696aa7158560d8f0e064c487 Mon Sep 17 00:00:00 2001 From: Jay Fang Date: Mon, 10 May 2021 14:58:22 +0800 Subject: [PATCH 0218/3804] spi: delete repeated words in comments Drop repeated words in spi-bcm2835aux.c {are} Drop repeated words in spi-dw-mmio.c {the} Drop repeated words in spi-geni-qcom.c {our} Drop repeated words in spi-pl022.c {on} Drop repeated words in spi-ppc4xx.c {the} Signed-off-by: Jay Fang Link: https://lore.kernel.org/r/1620629903-15493-4-git-send-email-f.fangjian@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-bcm2835aux.c | 2 +- drivers/spi/spi-dw-mmio.c | 2 +- drivers/spi/spi-geni-qcom.c | 4 ++-- drivers/spi/spi-pl022.c | 4 ++-- drivers/spi/spi-ppc4xx.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c index 75589ac6e95f9..37eab100a7d8a 100644 --- a/drivers/spi/spi-bcm2835aux.c +++ b/drivers/spi/spi-bcm2835aux.c @@ -384,7 +384,7 @@ static int bcm2835aux_spi_transfer_one(struct spi_master *master, bs->pending = 0; /* Calculate the estimated time in us the transfer runs. Note that - * there are are 2 idle clocks cycles after each chunk getting + * there are 2 idle clocks cycles after each chunk getting * transferred - in our case the chunk size is 3 bytes, so we * approximate this by 9 cycles/byte. This is used to find the number * of Hz per byte per polling limit. E.g., we can transfer 1 byte in diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c index 17c06039a74d1..3379720cfcb8d 100644 --- a/drivers/spi/spi-dw-mmio.c +++ b/drivers/spi/spi-dw-mmio.c @@ -56,7 +56,7 @@ struct dw_spi_mscc { /* * The Designware SPI controller (referred to as master in the documentation) * automatically deasserts chip select when the tx fifo is empty. The chip - * selects then needs to be either driven as GPIOs or, for the first 4 using the + * selects then needs to be either driven as GPIOs or, for the first 4 using * the SPI boot controller registers. the final chip select is an OR gate * between the Designware SPI controller and the SPI boot controller. */ diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c index 3d0d8ddd57720..b3861fb88711a 100644 --- a/drivers/spi/spi-geni-qcom.c +++ b/drivers/spi/spi-geni-qcom.c @@ -639,8 +639,8 @@ static irqreturn_t geni_spi_isr(int irq, void *data) complete(&mas->abort_done); /* - * It's safe or a good idea to Ack all of our our interrupts at the - * end of the function. Specifically: + * It's safe or a good idea to Ack all of our interrupts at the end + * of the function. Specifically: * - M_CMD_DONE_EN / M_RX_FIFO_LAST_EN: Edge triggered interrupts and * clearing Acks. Clearing at the end relies on nobody else having * started a new transfer yet or else we could be clearing _their_ diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index 0c9e3f270f05a..feebda66f56eb 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -288,7 +288,7 @@ #define SPI_POLLING_TIMEOUT 1000 /* - * The type of reading going on on this chip + * The type of reading going on this chip */ enum ssp_reading { READING_NULL, @@ -298,7 +298,7 @@ enum ssp_reading { }; /* - * The type of writing going on on this chip + * The type of writing going on this chip */ enum ssp_writing { WRITING_NULL, diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 9e3974551204d..76874a7cca9b5 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -326,7 +326,7 @@ static void spi_ppc4xx_enable(struct ppc4xx_spi *hw) { /* * On all 4xx PPC's the SPI bus is shared/multiplexed with - * the 2nd I2C bus. We need to enable the the SPI bus before + * the 2nd I2C bus. We need to enable the SPI bus before * using it. */ -- GitLab From 9e37a3ab0627011fb63875e9a93094b6fc8ddf48 Mon Sep 17 00:00:00 2001 From: Jay Fang Date: Mon, 10 May 2021 14:58:23 +0800 Subject: [PATCH 0219/3804] spi: spi-loopback-test: Fix 'tx_buf' might be 'rx_buf' In function 'spi_test_run_iter': Value 'tx_buf' might be 'rx_buf'. Signed-off-by: Jay Fang Link: https://lore.kernel.org/r/1620629903-15493-5-git-send-email-f.fangjian@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-loopback-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c index f1cf2232f0b5e..4d4f77a186a98 100644 --- a/drivers/spi/spi-loopback-test.c +++ b/drivers/spi/spi-loopback-test.c @@ -875,7 +875,7 @@ static int spi_test_run_iter(struct spi_device *spi, test.transfers[i].len = len; if (test.transfers[i].tx_buf) test.transfers[i].tx_buf += tx_off; - if (test.transfers[i].tx_buf) + if (test.transfers[i].rx_buf) test.transfers[i].rx_buf += rx_off; } -- GitLab From f2eed8caa336e31d672804a8725dadba0415f19d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 23 Apr 2021 21:24:28 +0300 Subject: [PATCH 0220/3804] spi: pxa2xx: Use one point of return when ->probe() fails When we can't allocate SPI controller, jump to the error path rather than return locally. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210423182441.50272-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 5e59ba075bc7a..2f5618883ac3c 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1705,8 +1705,8 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) if (!controller) { dev_err(&pdev->dev, "cannot alloc spi_controller\n"); - pxa_ssp_free(ssp); - return -ENOMEM; + status = -ENOMEM; + goto out_error_controller_alloc; } drv_data = spi_controller_get_devdata(controller); drv_data->controller = controller; -- GitLab From 9e43c9a8d5de4810ea9688519d55b5e46784d84a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 23 Apr 2021 21:24:29 +0300 Subject: [PATCH 0221/3804] spi: pxa2xx: Utilize MMIO and physical base from struct ssp_device We have a duplication of MMIO and physical base addresses in the struct driver_data, get rid of it and reuse members from struct ssp_device instead. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210423182441.50272-3-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-dma.c | 4 ++-- drivers/spi/spi-pxa2xx.c | 4 +--- drivers/spi/spi-pxa2xx.h | 14 ++++---------- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c index 37567bc7a5232..3b27f356a18f3 100644 --- a/drivers/spi/spi-pxa2xx-dma.c +++ b/drivers/spi/spi-pxa2xx-dma.c @@ -94,14 +94,14 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data, cfg.direction = dir; if (dir == DMA_MEM_TO_DEV) { - cfg.dst_addr = drv_data->ssdr_physical; + cfg.dst_addr = drv_data->ssp->phys_base + SSDR; cfg.dst_addr_width = width; cfg.dst_maxburst = chip->dma_burst_size; sgt = &xfer->tx_sg; chan = drv_data->controller->dma_tx; } else { - cfg.src_addr = drv_data->ssdr_physical; + cfg.src_addr = drv_data->ssp->phys_base + SSDR; cfg.src_addr_width = width; cfg.src_maxburst = chip->dma_burst_size; diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 2f5618883ac3c..d89db682179d0 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -325,7 +325,7 @@ static void lpss_ssp_setup(struct driver_data *drv_data) u32 value; config = lpss_get_config(drv_data); - drv_data->lpss_base = drv_data->ioaddr + config->offset; + drv_data->lpss_base = drv_data->ssp->mmio_base + config->offset; /* Enable software chip select control */ value = __lpss_ssp_read_priv(drv_data, config->reg_cs_ctrl); @@ -1733,8 +1733,6 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) drv_data->ssp_type = ssp->type; - drv_data->ioaddr = ssp->mmio_base; - drv_data->ssdr_physical = ssp->phys_base + SSDR; if (pxa25x_ssp_comp(drv_data)) { switch (drv_data->ssp_type) { case QUARK_X1000_SSP: diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index 1400472bc986c..ad9980ebefa65 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -33,10 +33,6 @@ struct driver_data { /* PXA hookup */ struct pxa2xx_spi_controller *controller_info; - /* SSP register addresses */ - void __iomem *ioaddr; - phys_addr_t ssdr_physical; - /* SSP masks*/ u32 dma_cr1; u32 int_cr1; @@ -87,16 +83,14 @@ struct chip_data { void (*cs_control)(u32 command); }; -static inline u32 pxa2xx_spi_read(const struct driver_data *drv_data, - unsigned reg) +static inline u32 pxa2xx_spi_read(const struct driver_data *drv_data, u32 reg) { - return __raw_readl(drv_data->ioaddr + reg); + return pxa_ssp_read_reg(drv_data->ssp, reg); } -static inline void pxa2xx_spi_write(const struct driver_data *drv_data, - unsigned reg, u32 val) +static inline void pxa2xx_spi_write(const struct driver_data *drv_data, u32 reg, u32 val) { - __raw_writel(val, drv_data->ioaddr + reg); + pxa_ssp_write_reg(drv_data->ssp, reg, val); } #define DMA_ALIGNMENT 8 -- GitLab From c3dce24c40cc7cd07deca5b81b763eae66f30856 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 23 Apr 2021 21:24:30 +0300 Subject: [PATCH 0222/3804] spi: pxa2xx: Utilize struct device from struct ssp_device We have a duplication of struct device in the struct driver_data, get rid of it and reuse member from struct ssp_device instead. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210423182441.50272-4-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-dma.c | 12 +++++------- drivers/spi/spi-pxa2xx-pci.c | 1 + drivers/spi/spi-pxa2xx.c | 12 +++++------- drivers/spi/spi-pxa2xx.h | 4 ---- 4 files changed, 11 insertions(+), 18 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c index 3b27f356a18f3..2e4a49567146c 100644 --- a/drivers/spi/spi-pxa2xx-dma.c +++ b/drivers/spi/spi-pxa2xx-dma.c @@ -111,7 +111,7 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data, ret = dmaengine_slave_config(chan, &cfg); if (ret) { - dev_warn(&drv_data->pdev->dev, "DMA slave config failed\n"); + dev_warn(drv_data->ssp->dev, "DMA slave config failed\n"); return NULL; } @@ -125,7 +125,7 @@ irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data) status = pxa2xx_spi_read(drv_data, SSSR) & drv_data->mask_sr; if (status & SSSR_ROR) { - dev_err(&drv_data->pdev->dev, "FIFO overrun\n"); + dev_err(drv_data->ssp->dev, "FIFO overrun\n"); dmaengine_terminate_async(drv_data->controller->dma_rx); dmaengine_terminate_async(drv_data->controller->dma_tx); @@ -145,16 +145,14 @@ int pxa2xx_spi_dma_prepare(struct driver_data *drv_data, tx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_MEM_TO_DEV, xfer); if (!tx_desc) { - dev_err(&drv_data->pdev->dev, - "failed to get DMA TX descriptor\n"); + dev_err(drv_data->ssp->dev, "failed to get DMA TX descriptor\n"); err = -EBUSY; goto err_tx; } rx_desc = pxa2xx_spi_dma_prepare_one(drv_data, DMA_DEV_TO_MEM, xfer); if (!rx_desc) { - dev_err(&drv_data->pdev->dev, - "failed to get DMA RX descriptor\n"); + dev_err(drv_data->ssp->dev, "failed to get DMA RX descriptor\n"); err = -EBUSY; goto err_rx; } @@ -191,8 +189,8 @@ void pxa2xx_spi_dma_stop(struct driver_data *drv_data) int pxa2xx_spi_dma_setup(struct driver_data *drv_data) { struct pxa2xx_spi_controller *pdata = drv_data->controller_info; - struct device *dev = &drv_data->pdev->dev; struct spi_controller *controller = drv_data->controller; + struct device *dev = drv_data->ssp->dev; dma_cap_mask_t mask; dma_cap_zero(mask); diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 1833f5876e9fa..f588fad77fc09 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -239,6 +239,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, spi_pdata.dma_burst_size = c->dma_burst_size ? c->dma_burst_size : 1; ssp = &spi_pdata.ssp; + ssp->dev = &dev->dev; ssp->phys_base = pci_resource_start(dev, 0); ssp->mmio_base = pcim_iomap_table(dev)[0]; ssp->port_id = (c->port_id >= 0) ? c->port_id : dev->devfn; diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index d89db682179d0..0f3f7d7259374 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -628,7 +628,7 @@ static void int_error_stop(struct driver_data *drv_data, const char *msg) pxa2xx_spi_flush(drv_data); pxa2xx_spi_off(drv_data); - dev_err(&drv_data->pdev->dev, "%s\n", msg); + dev_err(drv_data->ssp->dev, "%s\n", msg); drv_data->controller->cur_msg->status = -EIO; spi_finalize_current_transfer(drv_data->controller); @@ -731,8 +731,7 @@ static void handle_bad_msg(struct driver_data *drv_data) pxa2xx_spi_write(drv_data, SSTO, 0); write_SSSR_CS(drv_data, drv_data->clear_sr); - dev_err(&drv_data->pdev->dev, - "bad message state in interrupt handler\n"); + dev_err(drv_data->ssp->dev, "bad message state in interrupt handler\n"); } static irqreturn_t ssp_int(int irq, void *dev_id) @@ -748,7 +747,7 @@ static irqreturn_t ssp_int(int irq, void *dev_id) * the IRQ was not for us (we shouldn't be RPM suspended when the * interrupt is enabled). */ - if (pm_runtime_suspended(&drv_data->pdev->dev)) + if (pm_runtime_suspended(drv_data->ssp->dev)) return IRQ_NONE; /* @@ -1158,7 +1157,7 @@ static int pxa2xx_spi_slave_abort(struct spi_controller *controller) pxa2xx_spi_flush(drv_data); pxa2xx_spi_off(drv_data); - dev_dbg(&drv_data->pdev->dev, "transfer aborted\n"); + dev_dbg(drv_data->ssp->dev, "transfer aborted\n"); drv_data->controller->cur_msg->status = -EINTR; spi_finalize_current_transfer(drv_data->controller); @@ -1645,7 +1644,7 @@ static int pxa2xx_spi_fw_translate_cs(struct spi_controller *controller, { struct driver_data *drv_data = spi_controller_get_devdata(controller); - if (has_acpi_companion(&drv_data->pdev->dev)) { + if (has_acpi_companion(drv_data->ssp->dev)) { switch (drv_data->ssp_type) { /* * For Atoms the ACPI DeviceSelection used by the Windows @@ -1711,7 +1710,6 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) drv_data = spi_controller_get_devdata(controller); drv_data->controller = controller; drv_data->controller_info = platform_info; - drv_data->pdev = pdev; drv_data->ssp = ssp; controller->dev.of_node = pdev->dev.of_node; diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index ad9980ebefa65..6724d7e056ce6 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -20,9 +19,6 @@ #include struct driver_data { - /* Driver model hookup */ - struct platform_device *pdev; - /* SSP Info */ struct ssp_device *ssp; -- GitLab From 0e4768713e71dd224633fd7e00ad358bc48f433a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 23 Apr 2021 21:24:31 +0300 Subject: [PATCH 0223/3804] spi: pxa2xx: Replace header inclusions by forward declarations When the data structure is only referred by pointer, compiler may not need to see the contents of the data type. Thus, we may replace header inclusions by respective forward declarations. Due to above add missed headers as well. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210423182441.50272-5-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-dma.c | 4 ++-- drivers/spi/spi-pxa2xx-pci.c | 1 + drivers/spi/spi-pxa2xx.c | 2 ++ drivers/spi/spi-pxa2xx.h | 18 ++++++++++-------- include/linux/spi/pxa2xx_spi.h | 2 ++ 5 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c index 2e4a49567146c..e00dbadd39ecb 100644 --- a/drivers/spi/spi-pxa2xx-dma.c +++ b/drivers/spi/spi-pxa2xx-dma.c @@ -9,11 +9,11 @@ #include #include #include -#include #include #include -#include + #include +#include #include "spi-pxa2xx.h" diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index f588fad77fc09..a259be12d3265 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -8,6 +8,7 @@ #include #include #include + #include #include diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 0f3f7d7259374..1d4c7f4217ede 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,7 @@ #include #include #include + #include #include diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index 6724d7e056ce6..739e264feaa69 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -7,16 +7,18 @@ #ifndef SPI_PXA2XX_H #define SPI_PXA2XX_H -#include -#include -#include -#include #include -#include -#include +#include +#include #include -#include -#include + +#include + +struct gpio_desc; +struct pxa2xx_spi_controller; +struct spi_controller; +struct spi_device; +struct spi_transfer; struct driver_data { /* SSP Info */ diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index 31f00c7f4f59d..1e0e2f136319f 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -5,6 +5,8 @@ #ifndef __linux_pxa2xx_spi_h #define __linux_pxa2xx_spi_h +#include + #include #define PXA2XX_CS_ASSERT (0x01) -- GitLab From 5edc24901f4d469f8fc943004f73655933e89dbf Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 23 Apr 2021 21:24:32 +0300 Subject: [PATCH 0224/3804] spi: pxa2xx: Unify ifdeffery used in the headers The two headers have quite different ifdeffery to prevent multiple inclusion. Unify them with the pattern that in particular reflects their location. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210423182441.50272-6-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 6 +++--- include/linux/spi/pxa2xx_spi.h | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 7f73b26ed22e4..14b049840faff 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -11,8 +11,8 @@ * PXA3xx SSP1, SSP2, SSP3, SSP4 */ -#ifndef __LINUX_SSP_H -#define __LINUX_SSP_H +#ifndef __LINUX_PXA2XX_SSP_H +#define __LINUX_PXA2XX_SSP_H #include #include @@ -270,4 +270,4 @@ static inline struct ssp_device *pxa_ssp_request_of(const struct device_node *n, static inline void pxa_ssp_free(struct ssp_device *ssp) {} #endif -#endif +#endif /* __LINUX_PXA2XX_SSP_H */ diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index 1e0e2f136319f..12ef04d0896d2 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -2,8 +2,8 @@ /* * Copyright (C) 2005 Stephen Street / StreetFire Sound Labs */ -#ifndef __linux_pxa2xx_spi_h -#define __linux_pxa2xx_spi_h +#ifndef __LINUX_SPI_PXA2XX_SPI_H +#define __LINUX_SPI_PXA2XX_SPI_H #include @@ -51,4 +51,5 @@ struct pxa2xx_spi_chip { extern void pxa2xx_set_spi_info(unsigned id, struct pxa2xx_spi_controller *info); #endif -#endif + +#endif /* __LINUX_SPI_PXA2XX_SPI_H */ -- GitLab From 1beb37b0e3f98708bfb37778049764b4500756da Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 23 Apr 2021 21:24:33 +0300 Subject: [PATCH 0225/3804] spi: pxa2xx: Group Intel Quark specific definitions DDS_RATE is Intel Quark specific definition. Move it to the rest Intel Quark related. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210423182441.50272-7-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 14b049840faff..1b6c1a0922bd2 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -38,7 +38,6 @@ struct device_node; #define SSDR (0x10) /* SSP Data Write/Data Read Register */ #define SSTO (0x28) /* SSP Time Out Register */ -#define DDS_RATE (0x28) /* SSP DDS Clock Rate Register (Intel Quark) */ #define SSPSP (0x2C) /* SSP Programmable Serial Protocol */ #define SSTSA (0x30) /* SSP Tx Timeslot Active */ #define SSRSA (0x34) /* SSP Rx Timeslot Active */ @@ -105,6 +104,9 @@ struct device_node; #define CE4100_SSCR1_RFT GENMASK(11, 10) /* Receive FIFO Threshold (mask) */ #define CE4100_SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..4] */ +/* Intel Quark X1000 */ +#define DDS_RATE 0x28 /* SSP DDS Clock Rate Register */ + /* QUARK_X1000 SSCR0 bit definition */ #define QUARK_X1000_SSCR0_DSS GENMASK(4, 0) /* Data Size Select (mask) */ #define QUARK_X1000_SSCR0_DataSize(x) ((x) - 1) /* Data Size Select [4..32] */ -- GitLab From 026a1dc1af52742c5897e64a3431445371a71871 Mon Sep 17 00:00:00 2001 From: Jay Fang Date: Thu, 6 May 2021 15:08:08 +0800 Subject: [PATCH 0226/3804] spi: spi-topcliff-pch: Fix potential double free in pch_spi_process_messages() pch_spi_set_tx() frees data->pkt_tx_buff on failure of kzalloc() for data->pkt_rx_buff, but its caller, pch_spi_process_messages(), will free data->pkt_tx_buff again. Set data->pkt_tx_buff to NULL after kfree() to avoid double free. Signed-off-by: Jay Fang Link: https://lore.kernel.org/r/1620284888-65215-1-git-send-email-f.fangjian@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-topcliff-pch.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-topcliff-pch.c b/drivers/spi/spi-topcliff-pch.c index b8870784fc6ef..8c4615b763398 100644 --- a/drivers/spi/spi-topcliff-pch.c +++ b/drivers/spi/spi-topcliff-pch.c @@ -580,8 +580,10 @@ static void pch_spi_set_tx(struct pch_spi_data *data, int *bpw) data->pkt_tx_buff = kzalloc(size, GFP_KERNEL); if (data->pkt_tx_buff != NULL) { data->pkt_rx_buff = kzalloc(size, GFP_KERNEL); - if (!data->pkt_rx_buff) + if (!data->pkt_rx_buff) { kfree(data->pkt_tx_buff); + data->pkt_tx_buff = NULL; + } } if (!data->pkt_rx_buff) { -- GitLab From 029d32a892a860017d33ff8d9598259731e776ad Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 May 2021 13:52:59 +0200 Subject: [PATCH 0227/3804] spi: dw-apb-ssi: Integrate Renesas RZ/N1 SPI controller Originally, the Renesas RZ/N1 SPI Controller DT bindings were not integrated in the main DT bindings for the Synopsys DesignWare Synchronous Serial Interface, but in its own file, as the RZ/N1 controller has additional registers for software CS control and DMA. As so far DMA is not supported on RZ/N1, and json-schema can handle any possible differences fine, integrate the RZ/N1 compatible values in the main DT bindings for the Synopsys DW SSI. Signed-off-by: Geert Uytterhoeven Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/aef15aa119ed02487ded4691141678bc1040c3b4.1620301936.git.geert+renesas@glider.be Signed-off-by: Mark Brown --- .../devicetree/bindings/spi/renesas,rzn1-spi.txt | 11 ----------- .../devicetree/bindings/spi/snps,dw-apb-ssi.yaml | 6 ++++++ 2 files changed, 6 insertions(+), 11 deletions(-) delete mode 100644 Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt diff --git a/Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt b/Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt deleted file mode 100644 index fb1a6728638d3..0000000000000 --- a/Documentation/devicetree/bindings/spi/renesas,rzn1-spi.txt +++ /dev/null @@ -1,11 +0,0 @@ -Renesas RZ/N1 SPI Controller - -This controller is based on the Synopsys DW Synchronous Serial Interface and -inherits all properties defined in snps,dw-apb-ssi.txt except for the -compatible property. - -Required properties: -- compatible : The device specific string followed by the generic RZ/N1 string. - Therefore it must be one of: - "renesas,r9a06g032-spi", "renesas,rzn1-spi" - "renesas,r9a06g033-spi", "renesas,rzn1-spi" diff --git a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml index 4825157cd92e8..ca91201a99269 100644 --- a/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml +++ b/Documentation/devicetree/bindings/spi/snps,dw-apb-ssi.yaml @@ -67,6 +67,12 @@ properties: const: baikal,bt1-sys-ssi - description: Canaan Kendryte K210 SoS SPI Controller const: canaan,k210-spi + - description: Renesas RZ/N1 SPI Controller + items: + - enum: + - renesas,r9a06g032-spi # RZ/N1D + - renesas,r9a06g033-spi # RZ/N1S + - const: renesas,rzn1-spi # RZ/N1 reg: minItems: 1 -- GitLab From e7a1a3abea373e41ba7dfe0fbc93cb79b6a3a529 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Thu, 29 Apr 2021 19:20:48 +0800 Subject: [PATCH 0228/3804] spi: omap-100k: Fix the length judgment problem word_len should be checked in the omap1_spi100k_setup_transfer function to see if it exceeds 32. Signed-off-by: Tian Tao Link: https://lore.kernel.org/r/1619695248-39045-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Mark Brown --- drivers/spi/spi-omap-100k.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-omap-100k.c b/drivers/spi/spi-omap-100k.c index 7062f29022539..f104470605b38 100644 --- a/drivers/spi/spi-omap-100k.c +++ b/drivers/spi/spi-omap-100k.c @@ -241,7 +241,7 @@ static int omap1_spi100k_setup_transfer(struct spi_device *spi, else word_len = spi->bits_per_word; - if (spi->bits_per_word > 32) + if (word_len > 32) return -EINVAL; cs->word_len = word_len; -- GitLab From 86b1d8ecb5f1f271a660ce0b882658447f85904a Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 7 May 2021 17:07:59 +0200 Subject: [PATCH 0229/3804] spi: tegra114: Fix an error message 'ret' is known to be 0 here. No error code is available, so just remove it from the error message. Fixes: f333a331ad ("spi/tegra114: add spi driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/e2593974c9484b7055177ad0c9237c8e343946be.1620399829.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/spi/spi-tegra114.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/spi-tegra114.c b/drivers/spi/spi-tegra114.c index a2e5907276e7f..5131141bbf0d4 100644 --- a/drivers/spi/spi-tegra114.c +++ b/drivers/spi/spi-tegra114.c @@ -1071,8 +1071,7 @@ static int tegra_spi_transfer_one_message(struct spi_master *master, ret = wait_for_completion_timeout(&tspi->xfer_completion, SPI_DMA_TIMEOUT); if (WARN_ON(ret == 0)) { - dev_err(tspi->dev, - "spi transfer timeout, err %d\n", ret); + dev_err(tspi->dev, "spi transfer timeout\n"); if (tspi->is_curr_dma_xfer && (tspi->cur_direction & DATA_DIR_TX)) dmaengine_terminate_all(tspi->tx_dma_chan); -- GitLab From 665a990fdbea66a4d2af0287420f8266631be2ab Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Fri, 7 May 2021 18:26:39 +0200 Subject: [PATCH 0230/3804] spi: tegra210-quad: Fix an error message 'ret' is known to be 0 here. No error code is available, so just remove it from the error message. Fixes: 921fc1838fb0 ("spi: tegra210-quad: Add support for Tegra210 QSPI controller") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/b990c1bb5830196142c3d70e3e3c6c0245a7e75f.1620404705.git.christophe.jaillet@wanadoo.fr Signed-off-by: Mark Brown --- drivers/spi/spi-tegra210-quad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-tegra210-quad.c b/drivers/spi/spi-tegra210-quad.c index 2f806f4b2c34e..2354ca1e38581 100644 --- a/drivers/spi/spi-tegra210-quad.c +++ b/drivers/spi/spi-tegra210-quad.c @@ -1028,7 +1028,7 @@ static int tegra_qspi_transfer_one_message(struct spi_master *master, struct spi ret = wait_for_completion_timeout(&tqspi->xfer_completion, QSPI_DMA_TIMEOUT); if (WARN_ON(ret == 0)) { - dev_err(tqspi->dev, "transfer timeout: %d\n", ret); + dev_err(tqspi->dev, "transfer timeout\n"); if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_TX)) dmaengine_terminate_all(tqspi->tx_dma_chan); if (tqspi->is_curr_dma_xfer && (tqspi->cur_direction & DATA_DIR_RX)) -- GitLab From 14b6cff54edaca5740068e9ed070152727ed7718 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Apr 2021 17:26:19 +0200 Subject: [PATCH 0231/3804] staging: rtl8723bs: avoid bogus gcc warning gcc gets confused by some of the type casts and produces an apparently senseless warning about an out-of-bound memcpy to an unrelated array in the same structure: drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c: In function 'rtw_cfg80211_ap_set_encryption': cc1: error: writing 8 bytes into a region of size 0 [-Werror=stringop-overflow=] In file included from drivers/staging/rtl8723bs/include/drv_types.h:32, from drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c:10: drivers/staging/rtl8723bs/include/rtw_security.h:98:15: note: at offset [184, 4264] into destination object 'dot11AuthAlgrthm' of size 4 98 | u32 dot11AuthAlgrthm; /* 802.11 auth, could be open, shared, 8021x and authswitch */ | ^~~~~~~~~~~~~~~~ cc1: error: writing 8 bytes into a region of size 0 [-Werror=stringop-overflow=] drivers/staging/rtl8723bs/include/rtw_security.h:98:15: note: at offset [264, 4344] into destination object 'dot11AuthAlgrthm' of size 4 This is a known gcc bug, and the patch here is only a workaround, but the approach of using a temporary variable to hold a pointer to the key also improves readability in addition to avoiding the warning, so overall this should still help. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99673 Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20210422152648.2891996-1-arnd@kernel.org Cc: stable Signed-off-by: Greg Kroah-Hartman --- .../staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 23 +++++++++++-------- .../staging/rtl8723bs/os_dep/ioctl_linux.c | 21 +++++++++-------- 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index c1dac6eec59f5..a6d731e959a28 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -527,6 +527,9 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct security_priv *psecuritypriv = &(padapter->securitypriv); struct sta_priv *pstapriv = &padapter->stapriv; + char *grpkey = padapter->securitypriv.dot118021XGrpKey[param->u.crypt.idx].skey; + char *txkey = padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey; + char *rxkey = padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey; param->u.crypt.err = 0; param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0'; @@ -609,7 +612,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -622,12 +625,12 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); - memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); + memcpy(rxkey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; @@ -636,7 +639,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { @@ -713,7 +716,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -725,12 +728,12 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); - memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); + memcpy(rxkey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; @@ -739,7 +742,7 @@ static int rtw_cfg80211_ap_set_encryption(struct net_device *dev, struct ieee_pa { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c index e98e5388d5c7b..5088c3731b6df 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_linux.c @@ -2963,6 +2963,9 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, struct mlme_priv *pmlmepriv = &padapter->mlmepriv; struct security_priv *psecuritypriv = &(padapter->securitypriv); struct sta_priv *pstapriv = &padapter->stapriv; + char *txkey = padapter->securitypriv.dot118021XGrptxmickey[param->u.crypt.idx].skey; + char *rxkey = padapter->securitypriv.dot118021XGrprxmickey[param->u.crypt.idx].skey; + char *grpkey = psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey; param->u.crypt.err = 0; param->u.crypt.alg[IEEE_CRYPT_ALG_NAME_LEN - 1] = '\0'; @@ -3064,7 +3067,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, if (!psta && check_fwstate(pmlmepriv, WIFI_AP_STATE)) { /* group key */ if (param->u.crypt.set_tx == 1) { if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -3073,11 +3076,11 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, } else if (strcmp(param->u.crypt.alg, "TKIP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; @@ -3086,7 +3089,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, else if (strcmp(param->u.crypt.alg, "CCMP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_; } @@ -3142,7 +3145,7 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, } else { /* group key??? */ if (strcmp(param->u.crypt.alg, "WEP") == 0) { - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); psecuritypriv->dot118021XGrpPrivacy = _WEP40_; if (param->u.crypt.key_len == 13) @@ -3150,19 +3153,19 @@ static int rtw_set_encryption(struct net_device *dev, struct ieee_param *param, } else if (strcmp(param->u.crypt.alg, "TKIP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _TKIP_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); /* DEBUG_ERR("set key length :param->u.crypt.key_len =%d\n", param->u.crypt.key_len); */ /* set mic key */ - memcpy(psecuritypriv->dot118021XGrptxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[16]), 8); - memcpy(psecuritypriv->dot118021XGrprxmickey[param->u.crypt.idx].skey, &(param->u.crypt.key[24]), 8); + memcpy(txkey, &(param->u.crypt.key[16]), 8); + memcpy(rxkey, &(param->u.crypt.key[24]), 8); psecuritypriv->busetkipkey = true; } else if (strcmp(param->u.crypt.alg, "CCMP") == 0) { psecuritypriv->dot118021XGrpPrivacy = _AES_; - memcpy(psecuritypriv->dot118021XGrpKey[param->u.crypt.idx].skey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); + memcpy(grpkey, param->u.crypt.key, (param->u.crypt.key_len > 16 ? 16 : param->u.crypt.key_len)); } else { psecuritypriv->dot118021XGrpPrivacy = _NO_PRIVACY_; } -- GitLab From cabb1bb60e88ccaaa122ba01862403cd44e8e8f8 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 26 Apr 2021 19:55:58 +0200 Subject: [PATCH 0232/3804] mmc: meson-gx: make replace WARN_ONCE with dev_warn_once about scatterlist offset alignment Some drivers like ath10k can sometimg give an sg buffer with an offset whose alignment is not compatible with the Amlogic DMA descriptor engine requirements. Simply replace with dev_warn_once() to inform user this should be fixed to avoid degraded performance. This should be ultimately fixed in ath10k, but since it's only a performance issue the warning should be removed. Fixes: 79ed05e329c3 ("mmc: meson-gx: add support for descriptor chain mode") Cc: stable@vger.kernel.org Reported-by: Christian Hewitt Signed-off-by: Neil Armstrong Acked-by: Martin Blumenstingl Link: https://lore.kernel.org/r/20210426175559.3110575-1-narmstrong@baylibre.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index b8b771b643cc8..1c61f0f24c09b 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -258,7 +258,9 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc, for_each_sg(data->sg, sg, data->sg_len, i) { /* check for 8 byte alignment */ if (sg->offset % 8) { - WARN_ONCE(1, "unaligned scatterlist buffer\n"); + dev_warn_once(mmc_dev(mmc), + "unaligned sg offset %u, disabling descriptor DMA for transfer\n", + sg->offset); return; } } -- GitLab From 9b81354d7ebc1fd17f666a168dcabf27dae290bd Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 26 Apr 2021 19:55:59 +0200 Subject: [PATCH 0233/3804] mmc: meson-gx: also check SD_IO_RW_EXTENDED for scatterlist size alignment The brcmfmac driver can generate a scatterlist from a skb with each packets not aligned to the block size. This is not supported by the Amlogic Descriptor dma engine where each descriptor must match a multiple of the block size. The sg list is valid, since the sum of the sg buffers is a multiple of the block size, but we must discard those when in SD_IO_RW_EXTENDED mode since SDIO block mode can be used under the hood even with data->blocks == 1. Those transfers are very rare, thus can be replaced by a bounce buffer without real performance loss. Fixes: 7412dee9f1fd ("mmc: meson-gx: replace WARN_ONCE with dev_warn_once about scatterlist size alignment in block mode") Cc: stable@vger.kernel.org Reported-by: Christian Hewitt Signed-off-by: Neil Armstrong Link: https://lore.kernel.org/r/20210426175559.3110575-2-narmstrong@baylibre.com Signed-off-by: Ulf Hansson --- drivers/mmc/host/meson-gx-mmc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index 1c61f0f24c09b..016a6106151a5 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -236,7 +236,8 @@ static void meson_mmc_get_transfer_mode(struct mmc_host *mmc, if (host->dram_access_quirk) return; - if (data->blocks > 1) { + /* SD_IO_RW_EXTENDED (CMD53) can also use block mode under the hood */ + if (data->blocks > 1 || mrq->cmd->opcode == SD_IO_RW_EXTENDED) { /* * In block mode DMA descriptor format, "length" field indicates * number of blocks and there is no way to pass DMA size that -- GitLab From a1149a6c06ee094a6e62886b0c0e8e66967a728a Mon Sep 17 00:00:00 2001 From: Daniel Beer Date: Sat, 24 Apr 2021 20:16:52 +1200 Subject: [PATCH 0234/3804] mmc: sdhci-pci-gli: increase 1.8V regulator wait Inserting an SD-card on an Intel NUC10i3FNK4 (which contains a GL9755) results in the message: mmc0: 1.8V regulator output did not become stable Following this message, some cards work (sometimes), but most cards fail with EILSEQ. This behaviour is observed on Debian 10 running kernel 4.19.188, but also with 5.8.18 and 5.11.15. The driver currently waits 5ms after switching on the 1.8V regulator for it to become stable. Increasing this to 10ms gets rid of the warning about stability, but most cards still fail. Increasing it to 20ms gets some cards working (a 32GB Samsung micro SD works, a 128GB ADATA doesn't). At 50ms, the ADATA works most of the time, and at 100ms both cards work reliably. Signed-off-by: Daniel Beer Acked-by: Ben Chuang Fixes: e51df6ce668a ("mmc: host: sdhci-pci: Add Genesys Logic GL975x support") Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210424081652.GA16047@nyquist.nev Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-pci-gli.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pci-gli.c b/drivers/mmc/host/sdhci-pci-gli.c index 592d79082f58c..061618aa247f5 100644 --- a/drivers/mmc/host/sdhci-pci-gli.c +++ b/drivers/mmc/host/sdhci-pci-gli.c @@ -627,8 +627,13 @@ static void sdhci_gli_voltage_switch(struct sdhci_host *host) * * Wait 5ms after set 1.8V signal enable in Host Control 2 register * to ensure 1.8V signal enable bit is set by GL9750/GL9755. + * + * ...however, the controller in the NUC10i3FNK4 (a 9755) requires + * slightly longer than 5ms before the control register reports that + * 1.8V is ready, and far longer still before the card will actually + * work reliably. */ - usleep_range(5000, 5500); + usleep_range(100000, 110000); } static void sdhci_gl9750_reset(struct sdhci_host *host, u8 mask) -- GitLab From 18abf874367456540846319574864e6ff32752e2 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 26 Apr 2021 11:26:22 +0200 Subject: [PATCH 0235/3804] cdc-wdm: untangle a circular dependency between callback and softint We have a cycle of callbacks scheduling works which submit URBs with those callbacks. This needs to be blocked, stopped and unblocked to untangle the circle. Signed-off-by: Oliver Neukum Link: https://lore.kernel.org/r/20210426092622.20433-1-oneukum@suse.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-wdm.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index 508b1c3f8b731..d1e4a7379bebd 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -321,12 +321,23 @@ exit: } -static void kill_urbs(struct wdm_device *desc) +static void poison_urbs(struct wdm_device *desc) { /* the order here is essential */ - usb_kill_urb(desc->command); - usb_kill_urb(desc->validity); - usb_kill_urb(desc->response); + usb_poison_urb(desc->command); + usb_poison_urb(desc->validity); + usb_poison_urb(desc->response); +} + +static void unpoison_urbs(struct wdm_device *desc) +{ + /* + * the order here is not essential + * it is symmetrical just to be nice + */ + usb_unpoison_urb(desc->response); + usb_unpoison_urb(desc->validity); + usb_unpoison_urb(desc->command); } static void free_urbs(struct wdm_device *desc) @@ -741,11 +752,12 @@ static int wdm_release(struct inode *inode, struct file *file) if (!desc->count) { if (!test_bit(WDM_DISCONNECTING, &desc->flags)) { dev_dbg(&desc->intf->dev, "wdm_release: cleanup\n"); - kill_urbs(desc); + poison_urbs(desc); spin_lock_irq(&desc->iuspin); desc->resp_count = 0; spin_unlock_irq(&desc->iuspin); desc->manage_power(desc->intf, 0); + unpoison_urbs(desc); } else { /* must avoid dev_printk here as desc->intf is invalid */ pr_debug(KBUILD_MODNAME " %s: device gone - cleaning up\n", __func__); @@ -1037,9 +1049,9 @@ static void wdm_disconnect(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); - kill_urbs(desc); mutex_unlock(&desc->wlock); mutex_unlock(&desc->rlock); @@ -1080,9 +1092,10 @@ static int wdm_suspend(struct usb_interface *intf, pm_message_t message) set_bit(WDM_SUSPENDING, &desc->flags); spin_unlock_irq(&desc->iuspin); /* callback submits work - order is essential */ - kill_urbs(desc); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); + unpoison_urbs(desc); } if (!PMSG_IS_AUTO(message)) { mutex_unlock(&desc->wlock); @@ -1140,7 +1153,7 @@ static int wdm_pre_reset(struct usb_interface *intf) wake_up_all(&desc->wait); mutex_lock(&desc->rlock); mutex_lock(&desc->wlock); - kill_urbs(desc); + poison_urbs(desc); cancel_work_sync(&desc->rxwork); cancel_work_sync(&desc->service_outs_intr); return 0; @@ -1151,6 +1164,7 @@ static int wdm_post_reset(struct usb_interface *intf) struct wdm_device *desc = wdm_find_device(intf); int rv; + unpoison_urbs(desc); clear_bit(WDM_OVERFLOW, &desc->flags); clear_bit(WDM_RESETTING, &desc->flags); rv = recover_from_urb_loss(desc); -- GitLab From 04357fafea9c7ed34525eb9680c760245c3bb958 Mon Sep 17 00:00:00 2001 From: Ferry Toth Date: Sun, 25 Apr 2021 17:09:47 +0200 Subject: [PATCH 0236/3804] usb: dwc3: pci: Enable usb2-gadget-lpm-disable for Intel Merrifield On Intel Merrifield LPM is causing host to reset port after a timeout. By disabling LPM entirely this is prevented. Fixes: 066c09593454 ("usb: dwc3: pci: Enable extcon driver for Intel Merrifield") Reviewed-by: Andy Shevchenko Signed-off-by: Ferry Toth Cc: stable Link: https://lore.kernel.org/r/20210425150947.5862-1-ftoth@exalondelft.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index e7b932dcbf820..1e51460938b83 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -123,6 +123,7 @@ static const struct property_entry dwc3_pci_mrfld_properties[] = { PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"), PROPERTY_ENTRY_BOOL("snps,dis_u3_susphy_quirk"), PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"), + PROPERTY_ENTRY_BOOL("snps,usb2-gadget-lpm-disable"), PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"), {} }; -- GitLab From 9cbc7eb17cdf6d1adaa2aebfe0079077d31d39a9 Mon Sep 17 00:00:00 2001 From: Thinh Nguyen Date: Mon, 26 Apr 2021 14:08:40 -0700 Subject: [PATCH 0237/3804] usb: dwc3: core: Add missing GHWPARAMS9 doc Add missing documentation for struct dwc3_hwparams new field hwparams9 to avoid kernel doc build warning. Fixes: 16710380d3aa ("usb: dwc3: Capture new capability register GHWPARAMS9") Reported-by: Stephen Rothwell Acked-by: Felipe Balbi Signed-off-by: Thinh Nguyen Link: https://lore.kernel.org/r/f4c491f7614e623755fafe640b7e690e7c5634e2.1619471127.git.Thinh.Nguyen@synopsys.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index b1e875c58f20f..3859d8cad3cb4 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -850,6 +850,7 @@ struct dwc3_trb { * @hwparams6: GHWPARAMS6 * @hwparams7: GHWPARAMS7 * @hwparams8: GHWPARAMS8 + * @hwparams9: GHWPARAMS9 */ struct dwc3_hwparams { u32 hwparams0; -- GitLab From 6c05cdbb9ef1de0264cac9135f6e90dad1e8763f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sun, 25 Apr 2021 12:32:53 -0300 Subject: [PATCH 0238/3804] usb: Restore the reference to ch9.h Keep the textual reference to ch9.h as it was prior to commit caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location"). As linux/usb/ch9.h does not contain comments anymore, explain that drivers/usb/common/common.c includes such header and provides declarations of a few utilities routines for manipulating the data types from ch9.h. Also mention that drivers/usb/common/debug.c contains some functions for creating debug output. Fixes: caa93d9bd2d7 ("usb: Fix up movement of USB core kerneldoc location") Reported-by: Alan Stern Suggested-by: Alan Stern Acked-by: Alan Stern Signed-off-by: Fabio Estevam Link: https://lore.kernel.org/r/20210425153253.2542816-1-festevam@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-api/usb/usb.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Documentation/driver-api/usb/usb.rst b/Documentation/driver-api/usb/usb.rst index 543e70434da22..820e867af45ab 100644 --- a/Documentation/driver-api/usb/usb.rst +++ b/Documentation/driver-api/usb/usb.rst @@ -109,16 +109,19 @@ well as to make sure they aren't relying on some HCD-specific behavior. USB-Standard Types ================== -In ``drivers/usb/common/common.c`` and ``drivers/usb/common/debug.c`` you -will find the USB data types defined in chapter 9 of the USB specification. -These data types are used throughout USB, and in APIs including this host -side API, gadget APIs, usb character devices and debugfs interfaces. +In ``include/uapi/linux/usb/ch9.h`` you will find the USB data types defined +in chapter 9 of the USB specification. These data types are used throughout +USB, and in APIs including this host side API, gadget APIs, usb character +devices and debugfs interfaces. That file is itself included by +``include/linux/usb/ch9.h``, which also contains declarations of a few +utility routines for manipulating these data types; the implementations +are in ``drivers/usb/common/common.c``. .. kernel-doc:: drivers/usb/common/common.c :export: -.. kernel-doc:: drivers/usb/common/debug.c - :export: +In addition, some functions useful for creating debugging output are +defined in ``drivers/usb/common/debug.c``. Host-Side Data Types and Macros =============================== -- GitLab From d1d90dd27254c44d087ad3f8b5b3e4fff0571f45 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Wed, 28 Apr 2021 02:01:10 -0700 Subject: [PATCH 0239/3804] usb: dwc3: gadget: Enable suspend events commit 72704f876f50 ("dwc3: gadget: Implement the suspend entry event handler") introduced (nearly 5 years ago!) an interrupt handler for U3/L1-L2 suspend events. The problem is that these events aren't currently enabled in the DEVTEN register so the handler is never even invoked. Fix this simply by enabling the corresponding bit in dwc3_gadget_enable_irq() using the same revision check as found in the handler. Fixes: 72704f876f50 ("dwc3: gadget: Implement the suspend entry event handler") Acked-by: Felipe Balbi Signed-off-by: Jack Pham Cc: stable Link: https://lore.kernel.org/r/20210428090111.3370-1-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index dd80e5ca8c78b..cab3a91840689 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2323,6 +2323,10 @@ static void dwc3_gadget_enable_irq(struct dwc3 *dwc) if (DWC3_VER_IS_PRIOR(DWC3, 250A)) reg |= DWC3_DEVTEN_ULSTCNGEN; + /* On 2.30a and above this bit enables U3/L2-L1 Suspend Events */ + if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) + reg |= DWC3_DEVTEN_EOPFEN; + dwc3_writel(dwc->regs, DWC3_DEVTEN, reg); } -- GitLab From 6f26ebb79a84bcad211cb2d8a2ef74dfc427322d Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Wed, 28 Apr 2021 02:01:11 -0700 Subject: [PATCH 0240/3804] usb: dwc3: gadget: Rename EOPF event macros to Suspend The device event corresponding to End of Periodic Frame is only found on older IP revisions (2.10a and prior, according to a cursory SNPS databook search). On revisions 2.30a and newer, including DWC3.1, the same event value and corresponding DEVTEN bit were repurposed to indicate that the link has gone into suspend state (U3 or L2/L1). EOPF events had never been enabled before in this driver, and going forward we expect current and future DWC3-based devices won't likely to be using such old DWC3 IP revisions either. Hence rather than keeping the deprecated EOPF macro names let's rename them to indicate their usage for suspend events. Acked-by: Felipe Balbi Signed-off-by: Jack Pham Link: https://lore.kernel.org/r/20210428090111.3370-2-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.h | 6 +++--- drivers/usb/dwc3/debug.h | 8 ++++---- drivers/usb/dwc3/gadget.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 3859d8cad3cb4..c5d5760cdf53e 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -57,7 +57,7 @@ #define DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE 3 #define DWC3_DEVICE_EVENT_WAKEUP 4 #define DWC3_DEVICE_EVENT_HIBER_REQ 5 -#define DWC3_DEVICE_EVENT_EOPF 6 +#define DWC3_DEVICE_EVENT_SUSPEND 6 #define DWC3_DEVICE_EVENT_SOF 7 #define DWC3_DEVICE_EVENT_ERRATIC_ERROR 9 #define DWC3_DEVICE_EVENT_CMD_CMPL 10 @@ -460,7 +460,7 @@ #define DWC3_DEVTEN_CMDCMPLTEN BIT(10) #define DWC3_DEVTEN_ERRTICERREN BIT(9) #define DWC3_DEVTEN_SOFEN BIT(7) -#define DWC3_DEVTEN_EOPFEN BIT(6) +#define DWC3_DEVTEN_U3L2L1SUSPEN BIT(6) #define DWC3_DEVTEN_HIBERNATIONREQEVTEN BIT(5) #define DWC3_DEVTEN_WKUPEVTEN BIT(4) #define DWC3_DEVTEN_ULSTCNGEN BIT(3) @@ -1375,7 +1375,7 @@ struct dwc3_event_depevt { * 3 - ULStChng * 4 - WkUpEvt * 5 - Reserved - * 6 - EOPF + * 6 - Suspend (EOPF on revisions 2.10a and prior) * 7 - SOF * 8 - Reserved * 9 - ErrticErr diff --git a/drivers/usb/dwc3/debug.h b/drivers/usb/dwc3/debug.h index db231de46bb35..d0ac89c5b3172 100644 --- a/drivers/usb/dwc3/debug.h +++ b/drivers/usb/dwc3/debug.h @@ -221,8 +221,8 @@ static inline const char *dwc3_gadget_event_string(char *str, size_t size, snprintf(str, size, "WakeUp [%s]", dwc3_gadget_link_string(state)); break; - case DWC3_DEVICE_EVENT_EOPF: - snprintf(str, size, "End-Of-Frame [%s]", + case DWC3_DEVICE_EVENT_SUSPEND: + snprintf(str, size, "Suspend [%s]", dwc3_gadget_link_string(state)); break; case DWC3_DEVICE_EVENT_SOF: @@ -353,8 +353,8 @@ static inline const char *dwc3_gadget_event_type_string(u8 event) return "Wake-Up"; case DWC3_DEVICE_EVENT_HIBER_REQ: return "Hibernation"; - case DWC3_DEVICE_EVENT_EOPF: - return "End of Periodic Frame"; + case DWC3_DEVICE_EVENT_SUSPEND: + return "Suspend"; case DWC3_DEVICE_EVENT_SOF: return "Start of Frame"; case DWC3_DEVICE_EVENT_ERRATIC_ERROR: diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index cab3a91840689..6eab78f8a1a78 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2325,7 +2325,7 @@ static void dwc3_gadget_enable_irq(struct dwc3 *dwc) /* On 2.30a and above this bit enables U3/L2-L1 Suspend Events */ if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) - reg |= DWC3_DEVTEN_EOPFEN; + reg |= DWC3_DEVTEN_U3L2L1SUSPEN; dwc3_writel(dwc->regs, DWC3_DEVTEN, reg); } @@ -3744,7 +3744,7 @@ static void dwc3_gadget_interrupt(struct dwc3 *dwc, case DWC3_DEVICE_EVENT_LINK_STATUS_CHANGE: dwc3_gadget_linksts_change_interrupt(dwc, event->event_info); break; - case DWC3_DEVICE_EVENT_EOPF: + case DWC3_DEVICE_EVENT_SUSPEND: /* It changed to be suspend event for version 2.30a and above */ if (!DWC3_VER_IS_PRIOR(DWC3, 230A)) { /* -- GitLab From 75a41ce46bae6cbe7d3bb2584eb844291d642874 Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Thu, 6 May 2021 12:22:00 +0100 Subject: [PATCH 0241/3804] usb: dwc2: Fix gadget DMA unmap direction The dwc2 gadget support maps and unmaps DMA buffers as necessary. When mapping and unmapping it uses the direction of the endpoint to select the direction of the DMA transfer, but this fails for Control OUT transfers because the unmap occurs after the endpoint direction has been reversed for the status phase. A possible solution would be to unmap the buffer before the direction is changed, but a safer, less invasive fix is to remember the buffer direction independently of the endpoint direction. Fixes: fe0b94abcdf6 ("usb: dwc2: gadget: manage ep0 state in software") Acked-by: Minas Harutyunyan Cc: stable Signed-off-by: Phil Elwell Link: https://lore.kernel.org/r/20210506112200.2893922-1-phil@raspberrypi.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/core.h | 2 ++ drivers/usb/dwc2/gadget.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index da5ac4a4595b6..ab6b815e0089c 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -113,6 +113,7 @@ struct dwc2_hsotg_req; * @debugfs: File entry for debugfs file for this endpoint. * @dir_in: Set to true if this endpoint is of the IN direction, which * means that it is sending data to the Host. + * @map_dir: Set to the value of dir_in when the DMA buffer is mapped. * @index: The index for the endpoint registers. * @mc: Multi Count - number of transactions per microframe * @interval: Interval for periodic endpoints, in frames or microframes. @@ -162,6 +163,7 @@ struct dwc2_hsotg_ep { unsigned short fifo_index; unsigned char dir_in; + unsigned char map_dir; unsigned char index; unsigned char mc; u16 interval; diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index e6bb1bdb27603..184964174dc0c 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -422,7 +422,7 @@ static void dwc2_hsotg_unmap_dma(struct dwc2_hsotg *hsotg, { struct usb_request *req = &hs_req->req; - usb_gadget_unmap_request(&hsotg->gadget, req, hs_ep->dir_in); + usb_gadget_unmap_request(&hsotg->gadget, req, hs_ep->map_dir); } /* @@ -1242,6 +1242,7 @@ static int dwc2_hsotg_map_dma(struct dwc2_hsotg *hsotg, { int ret; + hs_ep->map_dir = hs_ep->dir_in; ret = usb_gadget_map_request(&hsotg->gadget, req, hs_ep->dir_in); if (ret) goto dma_error; -- GitLab From bb9c74a5bd1462499fe5ccb1e3c5ac40dcfa9139 Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Sat, 1 May 2021 02:35:58 -0700 Subject: [PATCH 0242/3804] usb: dwc3: gadget: Free gadget structure only after freeing endpoints As part of commit e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically") the dwc3_gadget_release() was added which will free the dwc->gadget structure upon the device's removal when usb_del_gadget_udc() is called in dwc3_gadget_exit(). However, simply freeing the gadget results a dangling pointer situation: the endpoints created in dwc3_gadget_init_endpoints() have their dep->endpoint.ep_list members chained off the list_head anchored at dwc->gadget->ep_list. Thus when dwc->gadget is freed, the first dwc3_ep in the list now has a dangling prev pointer and likewise for the next pointer of the dwc3_ep at the tail of the list. The dwc3_gadget_free_endpoints() that follows will result in a use-after-free when it calls list_del(). This was caught by enabling KASAN and performing a driver unbind. The recent commit 568262bf5492 ("usb: dwc3: core: Add shutdown callback for dwc3") also exposes this as a panic during shutdown. There are a few possibilities to fix this. One could be to perform a list_del() of the gadget->ep_list itself which removes it from the rest of the dwc3_ep chain. Another approach is what this patch does, by splitting up the usb_del_gadget_udc() call into its separate "del" and "put" components. This allows dwc3_gadget_free_endpoints() to be called before the gadget is finally freed with usb_put_gadget(). Fixes: e81a7018d93a ("usb: dwc3: allocate gadget structure dynamically") Reviewed-by: Peter Chen Signed-off-by: Jack Pham Link: https://lore.kernel.org/r/20210501093558.7375-1-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 6eab78f8a1a78..dd1342403bb24 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -4062,8 +4062,9 @@ err0: void dwc3_gadget_exit(struct dwc3 *dwc) { - usb_del_gadget_udc(dwc->gadget); + usb_del_gadget(dwc->gadget); dwc3_gadget_free_endpoints(dwc); + usb_put_gadget(dwc->gadget); dma_free_coherent(dwc->sysdev, DWC3_BOUNCE_SIZE, dwc->bounce, dwc->bounce_addr); kfree(dwc->setup_buf); -- GitLab From 18ffa988dbae69cc6e9949cddd9606f6fe533894 Mon Sep 17 00:00:00 2001 From: Wesley Cheng Date: Fri, 7 May 2021 10:55:19 -0700 Subject: [PATCH 0243/3804] usb: dwc3: gadget: Return success always for kick transfer in ep queue If an error is received when issuing a start or update transfer command, the error handler will stop all active requests (including the current USB request), and call dwc3_gadget_giveback() to notify function drivers of the requests which have been stopped. Avoid returning an error for kick transfer during EP queue, to remove duplicate cleanup operations on the request being queued. Fixes: 8d99087c2db8 ("usb: dwc3: gadget: Properly handle failed kick_transfer") cc: stable@vger.kernel.org Signed-off-by: Wesley Cheng Link: https://lore.kernel.org/r/1620410119-24971-1-git-send-email-wcheng@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index dd1342403bb24..49ca5da5e2794 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1684,7 +1684,9 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) } } - return __dwc3_gadget_kick_transfer(dep); + __dwc3_gadget_kick_transfer(dep); + + return 0; } static int dwc3_gadget_ep_queue(struct usb_ep *ep, struct usb_request *request, -- GitLab From b96992081fde19806b5beb5b25f9327820ead77b Mon Sep 17 00:00:00 2001 From: Li Jun Date: Fri, 30 Apr 2021 14:57:16 +0800 Subject: [PATCH 0244/3804] usb: dwc3: imx8mp: detect dwc3 core node via compatible string New schema of usb controller DT-node should be named with prefix "^usb(@.*)?", dt changed the node name, but missed counter part change in driver, fix it by switching to use compatible string as the dwc3 core compatible string keeps "snps,dwc3" in all dt. Fixes: d1689cd3c0f4 ("arm64: dts: imx8mp: Use the correct name for child node "snps, dwc3"") Acked-by: Felipe Balbi Signed-off-by: Li Jun Link: https://lore.kernel.org/r/1619765836-20387-1-git-send-email-jun.li@nxp.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-imx8mp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c index b13cfab89d532..e9fced6f7a7c9 100644 --- a/drivers/usb/dwc3/dwc3-imx8mp.c +++ b/drivers/usb/dwc3/dwc3-imx8mp.c @@ -165,7 +165,7 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev) if (err < 0) goto disable_rpm; - dwc3_np = of_get_child_by_name(node, "dwc3"); + dwc3_np = of_get_compatible_child(node, "snps,dwc3"); if (!dwc3_np) { dev_err(dev, "failed to find dwc3 core child\n"); goto disable_rpm; -- GitLab From 0b2b149e918f6dddb4ea53615551bf7bc131f875 Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 8 May 2021 09:53:10 +0800 Subject: [PATCH 0245/3804] usb: dwc3: imx8mp: fix error return code in dwc3_imx8mp_probe() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 6dd2565989b4 ("usb: dwc3: add imx8mp dwc3 glue layer driver") Reported-by: Hulk Robot Acked-by: Felipe Balbi Signed-off-by: Zhen Lei Cc: stable Link: https://lore.kernel.org/r/20210508015310.1627-1-thunder.leizhen@huawei.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-imx8mp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-imx8mp.c b/drivers/usb/dwc3/dwc3-imx8mp.c index e9fced6f7a7c9..756faa46d33a7 100644 --- a/drivers/usb/dwc3/dwc3-imx8mp.c +++ b/drivers/usb/dwc3/dwc3-imx8mp.c @@ -167,6 +167,7 @@ static int dwc3_imx8mp_probe(struct platform_device *pdev) dwc3_np = of_get_compatible_child(node, "snps,dwc3"); if (!dwc3_np) { + err = -ENODEV; dev_err(dev, "failed to find dwc3 core child\n"); goto disable_rpm; } -- GitLab From e89baeba4f64bab679618b3330cdcda5929fb8d5 Mon Sep 17 00:00:00 2001 From: Matthijs Kooijman Date: Mon, 3 May 2021 20:05:38 +0200 Subject: [PATCH 0246/3804] usb: dwc2: Remove obsolete MODULE_ constants from platform.c Originally, the core and platform drivers were separate modules, so each had its own module info. Since commit 2d1165a4b95e (usb: dwc2: remove dwc2_platform.ko) platform.c is included in the core module, which now contains duplicate module info (from core.c and platform.c). Due to the linking order and modinfo implementation, running `modinfo` on the resulting dwc2.ko shows just the info from platform.c, rather than that from core.c, suggesting that I am the author of the entire dwc2 module. Since platform.c is just a minor part of the entire module, this removes its module info in favor of the info from core.c. Acked-by: Minas Harutyunyan Signed-off-by: Matthijs Kooijman Link: https://lore.kernel.org/r/20210503180538.64423-1-matthijs@stdin.nl Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc2/platform.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 3024785d84cb8..520a0beef77ca 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -776,7 +776,3 @@ static struct platform_driver dwc2_platform_driver = { }; module_platform_driver(dwc2_platform_driver); - -MODULE_DESCRIPTION("DESIGNWARE HS OTG Platform Glue"); -MODULE_AUTHOR("Matthijs Kooijman "); -MODULE_LICENSE("Dual BSD/GPL"); -- GitLab From 2e2b8d15adc2f6ab2d4aa0550e241b9742a436a0 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Tue, 4 May 2021 01:18:49 +0800 Subject: [PATCH 0247/3804] usb: typec: tcpm: Fix wrong handling in GET_SINK_CAP After receiving Sink Capabilities Message in GET_SINK_CAP AMS, it is incorrect to call tcpm_pd_handle_state because the Message is expected and the current state is not Ready states. The result of this incorrect operation ends in Soft Reset which is definitely wrong. Simply forwarding to Ready States is enough to finish the AMS. Fixes: 8dea75e11380 ("usb: typec: tcpm: Protocol Error handling") Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Kyle Tso Cc: stable Link: https://lore.kernel.org/r/20210503171849.2605302-1-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index c4fdc00a3bc8f..68e04e397e924 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2390,7 +2390,7 @@ static void tcpm_pd_data_request(struct tcpm_port *port, port->nr_sink_caps = cnt; port->sink_cap_done = true; if (port->ams == GET_SINK_CAPABILITIES) - tcpm_pd_handle_state(port, ready_state(port), NONE_AMS, 0); + tcpm_set_state(port, ready_state(port), 0); /* Unexpected Sink Capabilities */ else tcpm_pd_handle_msg(port, -- GitLab From 8edb79af88efc6e49e735f9baf61d9f0748b881f Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Wed, 7 Apr 2021 11:49:27 +0800 Subject: [PATCH 0248/3804] iio: light: gp2ap002: Fix rumtime PM imbalance on error When devm_request_threaded_irq() fails, we should decrease the runtime PM counter to keep the counter balanced. But when iio_device_register() fails, we need not to decrease it because we have already decreased it before. Signed-off-by: Dinghao Liu Reviewed-by: Linus Walleij Fixes: 97d642e23037 ("iio: light: Add a driver for Sharp GP2AP002x00F") Link: https://lore.kernel.org/r/20210407034927.16882-1-dinghao.liu@zju.edu.cn Signed-off-by: Jonathan Cameron --- drivers/iio/light/gp2ap002.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iio/light/gp2ap002.c b/drivers/iio/light/gp2ap002.c index d048ae257c519..f960be7d40016 100644 --- a/drivers/iio/light/gp2ap002.c +++ b/drivers/iio/light/gp2ap002.c @@ -582,7 +582,7 @@ static int gp2ap002_probe(struct i2c_client *client, "gp2ap002", indio_dev); if (ret) { dev_err(dev, "unable to request IRQ\n"); - goto out_disable_vio; + goto out_put_pm; } gp2ap002->irq = client->irq; @@ -612,8 +612,9 @@ static int gp2ap002_probe(struct i2c_client *client, return 0; -out_disable_pm: +out_put_pm: pm_runtime_put_noidle(dev); +out_disable_pm: pm_runtime_disable(dev); out_disable_vio: regulator_disable(gp2ap002->vio); -- GitLab From a2fa9242e89f27696515699fe0f0296bf1ac1815 Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 12 Apr 2021 13:32:02 +0800 Subject: [PATCH 0249/3804] iio: proximity: pulsedlight: Fix rumtime PM imbalance on error When lidar_write_control() fails, a pairing PM usage counter decrement is needed to keep the counter balanced. Fixes: 4ac4e086fd8c5 ("iio: pulsedlight-lidar-lite: add runtime PM") Signed-off-by: Dinghao Liu Reviewed-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210412053204.4889-1-dinghao.liu@zju.edu.cn Signed-off-by: Jonathan Cameron --- drivers/iio/proximity/pulsedlight-lidar-lite-v2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c index c685f10b5ae48..cc206bfa09c78 100644 --- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c +++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c @@ -160,6 +160,7 @@ static int lidar_get_measurement(struct lidar_data *data, u16 *reg) ret = lidar_write_control(data, LIDAR_REG_CONTROL_ACQUIRE); if (ret < 0) { dev_err(&client->dev, "cannot send start measurement command"); + pm_runtime_put_noidle(&client->dev); return ret; } -- GitLab From 7061803522ee7876df1ca18cdd1e1551f761352d Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Wed, 14 Apr 2021 11:49:55 +0300 Subject: [PATCH 0250/3804] iio: hid-sensors: select IIO_TRIGGERED_BUFFER under HID_SENSOR_IIO_TRIGGER During commit 067fda1c065ff ("iio: hid-sensors: move triggered buffer setup into hid_sensor_setup_trigger"), the iio_triggered_buffer_{setup,cleanup}() functions got moved under the hid-sensor-trigger module. The above change works fine, if any of the sensors get built. However, when only the common hid-sensor-trigger module gets built (and none of the drivers), then the IIO_TRIGGERED_BUFFER symbol isn't selected/enforced. Previously, each driver would enforce/select the IIO_TRIGGERED_BUFFER symbol. With this change the HID_SENSOR_IIO_TRIGGER (for the hid-sensor-trigger module) will enforce that IIO_TRIGGERED_BUFFER gets selected. All HID sensor drivers select the HID_SENSOR_IIO_TRIGGER symbol. So, this change removes the IIO_TRIGGERED_BUFFER enforcement from each driver. Fixes: 067fda1c065ff ("iio: hid-sensors: move triggered buffer setup into hid_sensor_setup_trigger") Reported-by: Thomas Deutschmann Cc: Srinivas Pandruvada Signed-off-by: Alexandru Ardelean Acked-by: Srinivas Pandruvada Link: https://lore.kernel.org/r/20210414084955.260117-1-aardelean@deviqon.com Signed-off-by: Jonathan Cameron --- drivers/iio/accel/Kconfig | 1 - drivers/iio/common/hid-sensors/Kconfig | 1 + drivers/iio/gyro/Kconfig | 1 - drivers/iio/humidity/Kconfig | 1 - drivers/iio/light/Kconfig | 2 -- drivers/iio/magnetometer/Kconfig | 1 - drivers/iio/orientation/Kconfig | 2 -- drivers/iio/pressure/Kconfig | 1 - drivers/iio/temperature/Kconfig | 1 - 9 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index cceda3cecbcf4..8b1723635cce5 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -229,7 +229,6 @@ config DMARD10 config HID_SENSOR_ACCEL_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Accelerometers 3D" diff --git a/drivers/iio/common/hid-sensors/Kconfig b/drivers/iio/common/hid-sensors/Kconfig index 24d4925673363..2a3dd3b907bee 100644 --- a/drivers/iio/common/hid-sensors/Kconfig +++ b/drivers/iio/common/hid-sensors/Kconfig @@ -19,6 +19,7 @@ config HID_SENSOR_IIO_TRIGGER tristate "Common module (trigger) for all HID Sensor IIO drivers" depends on HID_SENSOR_HUB && HID_SENSOR_IIO_COMMON && IIO_BUFFER select IIO_TRIGGER + select IIO_TRIGGERED_BUFFER help Say yes here to build trigger support for HID sensors. Triggers will be send if all requested attributes were read. diff --git a/drivers/iio/gyro/Kconfig b/drivers/iio/gyro/Kconfig index 5824f2edf9758..20b5ac7ab66af 100644 --- a/drivers/iio/gyro/Kconfig +++ b/drivers/iio/gyro/Kconfig @@ -111,7 +111,6 @@ config FXAS21002C_SPI config HID_SENSOR_GYRO_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Gyroscope 3D" diff --git a/drivers/iio/humidity/Kconfig b/drivers/iio/humidity/Kconfig index 6549fcf6db698..2de5494e7c225 100644 --- a/drivers/iio/humidity/Kconfig +++ b/drivers/iio/humidity/Kconfig @@ -52,7 +52,6 @@ config HID_SENSOR_HUMIDITY tristate "HID Environmental humidity sensor" depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER help diff --git a/drivers/iio/light/Kconfig b/drivers/iio/light/Kconfig index 33ad4dd0b5c7b..917f9becf9c75 100644 --- a/drivers/iio/light/Kconfig +++ b/drivers/iio/light/Kconfig @@ -256,7 +256,6 @@ config ISL29125 config HID_SENSOR_ALS depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID ALS" @@ -270,7 +269,6 @@ config HID_SENSOR_ALS config HID_SENSOR_PROX depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID PROX" diff --git a/drivers/iio/magnetometer/Kconfig b/drivers/iio/magnetometer/Kconfig index 5d4ffd66032e9..74ad5701c6c29 100644 --- a/drivers/iio/magnetometer/Kconfig +++ b/drivers/iio/magnetometer/Kconfig @@ -95,7 +95,6 @@ config MAG3110 config HID_SENSOR_MAGNETOMETER_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Magenetometer 3D" diff --git a/drivers/iio/orientation/Kconfig b/drivers/iio/orientation/Kconfig index a505583cc2fda..396cbbb867f4c 100644 --- a/drivers/iio/orientation/Kconfig +++ b/drivers/iio/orientation/Kconfig @@ -9,7 +9,6 @@ menu "Inclinometer sensors" config HID_SENSOR_INCLINOMETER_3D depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Inclinometer 3D" @@ -20,7 +19,6 @@ config HID_SENSOR_INCLINOMETER_3D config HID_SENSOR_DEVICE_ROTATION depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID Device Rotation" diff --git a/drivers/iio/pressure/Kconfig b/drivers/iio/pressure/Kconfig index 689b978db4f95..fc0d3cfca4186 100644 --- a/drivers/iio/pressure/Kconfig +++ b/drivers/iio/pressure/Kconfig @@ -79,7 +79,6 @@ config DPS310 config HID_SENSOR_PRESS depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER tristate "HID PRESS" diff --git a/drivers/iio/temperature/Kconfig b/drivers/iio/temperature/Kconfig index f1f2a1499c9e2..4df60082c1fa8 100644 --- a/drivers/iio/temperature/Kconfig +++ b/drivers/iio/temperature/Kconfig @@ -45,7 +45,6 @@ config HID_SENSOR_TEMP tristate "HID Environmental temperature sensor" depends on HID_SENSOR_HUB select IIO_BUFFER - select IIO_TRIGGERED_BUFFER select HID_SENSOR_IIO_COMMON select HID_SENSOR_IIO_TRIGGER help -- GitLab From f73c730774d88a14d7b60feee6d0e13570f99499 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 23 Apr 2021 05:09:59 +0300 Subject: [PATCH 0251/3804] iio: gyro: mpu3050: Fix reported temperature value The raw temperature value is a 16-bit signed integer. The sign casting is missing in the code, which results in a wrong temperature reported by userspace tools, fix it. Cc: stable@vger.kernel.org Fixes: 3904b28efb2c ("iio: gyro: Add driver for the MPU-3050 gyroscope") Datasheet: https://www.cdiweb.com/datasheets/invensense/mpu-3000a.pdf Tested-by: Maxim Schwalm # Asus TF700T Tested-by: Svyatoslav Ryhel # Asus TF201 Reported-by: Svyatoslav Ryhel Reviewed-by: Andy Shevchenko Reviewed-by: Linus Walleij Signed-off-by: Dmitry Osipenko Acked-by: Jean-Baptiste Maneyrol Link: https://lore.kernel.org/r/20210423020959.5023-1-digetx@gmail.com Signed-off-by: Jonathan Cameron --- drivers/iio/gyro/mpu3050-core.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index ac90be03332af..f17a935195352 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -272,7 +272,16 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev, case IIO_CHAN_INFO_OFFSET: switch (chan->type) { case IIO_TEMP: - /* The temperature scaling is (x+23000)/280 Celsius */ + /* + * The temperature scaling is (x+23000)/280 Celsius + * for the "best fit straight line" temperature range + * of -30C..85C. The 23000 includes room temperature + * offset of +35C, 280 is the precision scale and x is + * the 16-bit signed integer reported by hardware. + * + * Temperature value itself represents temperature of + * the sensor die. + */ *val = 23000; return IIO_VAL_INT; default: @@ -329,7 +338,7 @@ static int mpu3050_read_raw(struct iio_dev *indio_dev, goto out_read_raw_unlock; } - *val = be16_to_cpu(raw_val); + *val = (s16)be16_to_cpu(raw_val); ret = IIO_VAL_INT; goto out_read_raw_unlock; -- GitLab From 901f84de0e16bde10a72d7eb2f2eb73fcde8fa1a Mon Sep 17 00:00:00 2001 From: Tomasz Duszynski Date: Fri, 23 Apr 2021 10:02:44 +0200 Subject: [PATCH 0252/3804] iio: core: fix ioctl handlers removal Currently ioctl handlers are removed twice. For the first time during iio_device_unregister() then later on inside iio_device_unregister_eventset() and iio_buffers_free_sysfs_and_mask(). Double free leads to kernel panic. Fix this by not touching ioctl handlers list directly but rather letting code responsible for registration call the matching cleanup routine itself. Fixes: 8dedcc3eee3ac ("iio: core: centralize ioctl() calls to the main chardev") Signed-off-by: Tomasz Duszynski Acked-by: Alexandru Ardelean Cc: Link: https://lore.kernel.org/r/20210423080244.2790-1-tomasz.duszynski@octakon.com Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index d92c58a94fe4f..9e59f5da3d280 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1926,9 +1926,6 @@ EXPORT_SYMBOL(__iio_device_register); **/ void iio_device_unregister(struct iio_dev *indio_dev) { - struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev); - struct iio_ioctl_handler *h, *t; - cdev_device_del(&indio_dev->chrdev, &indio_dev->dev); mutex_lock(&indio_dev->info_exist_lock); @@ -1939,9 +1936,6 @@ void iio_device_unregister(struct iio_dev *indio_dev) indio_dev->info = NULL; - list_for_each_entry_safe(h, t, &iio_dev_opaque->ioctl_handlers, entry) - list_del(&h->entry); - iio_device_wakeup_eventset(indio_dev); iio_buffer_wakeup_poll(indio_dev); -- GitLab From af0670b0bf1b116fd729b1b1011cf814bc34e12e Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Mon, 3 May 2021 17:43:50 +0300 Subject: [PATCH 0253/3804] iio: core: return ENODEV if ioctl is unknown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the ioctl() mechanism was introduced in IIO core to centralize the registration of all ioctls in one place via commit 8dedcc3eee3ac ("iio: core: centralize ioctl() calls to the main chardev"), the return code was changed from ENODEV to EINVAL, when the ioctl code isn't known. This was done by accident. This change reverts back to the old behavior, where if the ioctl() code isn't known, ENODEV is returned (vs EINVAL). This was brought into perspective by this patch: https://lore.kernel.org/linux-iio/20210428150815.136150-1-paul@crapouillou.net/ Fixes: 8dedcc3eee3ac ("iio: core: centralize ioctl() calls to the main chardev") Signed-off-by: Alexandru Ardelean Reviewed-by: Nuno Sá Tested-by: Paul Cercueil Reviewed-by: Linus Walleij Signed-off-by: Jonathan Cameron --- drivers/iio/industrialio-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index 9e59f5da3d280..59efb36db2c7c 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -1778,7 +1778,6 @@ static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!indio_dev->info) goto out_unlock; - ret = -EINVAL; list_for_each_entry(h, &iio_dev_opaque->ioctl_handlers, entry) { ret = h->ioctl(indio_dev, filp, cmd, arg); if (ret != IIO_IOCTL_UNHANDLED) @@ -1786,7 +1785,7 @@ static long iio_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) } if (ret == IIO_IOCTL_UNHANDLED) - ret = -EINVAL; + ret = -ENODEV; out_unlock: mutex_unlock(&indio_dev->info_exist_lock); -- GitLab From af0e1871d79cfbb91f732d2c6fa7558e45c31038 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 7 May 2021 19:30:41 +0100 Subject: [PATCH 0254/3804] iio: tsl2583: Fix division by a zero lux_val The lux_val returned from tsl2583_get_lux can potentially be zero, so check for this to avoid a division by zero and an overflowed gain_trim_val. Fixes clang scan-build warning: drivers/iio/light/tsl2583.c:345:40: warning: Either the condition 'lux_val<0' is redundant or there is division by zero at line 345. [zerodivcond] Fixes: ac4f6eee8fe8 ("staging: iio: TAOS tsl258x: Device driver") Signed-off-by: Colin Ian King Cc: Signed-off-by: Jonathan Cameron --- drivers/iio/light/tsl2583.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/iio/light/tsl2583.c b/drivers/iio/light/tsl2583.c index 0f787bfc88fc4..c9d8f07a6fcdd 100644 --- a/drivers/iio/light/tsl2583.c +++ b/drivers/iio/light/tsl2583.c @@ -341,6 +341,14 @@ static int tsl2583_als_calibrate(struct iio_dev *indio_dev) return lux_val; } + /* Avoid division by zero of lux_value later on */ + if (lux_val == 0) { + dev_err(&chip->client->dev, + "%s: lux_val of 0 will produce out of range trim_value\n", + __func__); + return -ENODATA; + } + gain_trim_val = (unsigned int)(((chip->als_settings.als_cal_target) * chip->als_settings.als_gain_trim) / lux_val); if ((gain_trim_val < 250) || (gain_trim_val > 4000)) { -- GitLab From b9a0866a5bdf6a4643a52872ada6be6184c6f4f2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 5 May 2021 01:23:37 +0300 Subject: [PATCH 0255/3804] usb: typec: ucsi: Put fwnode in any case during ->probe() device_for_each_child_node() bumps a reference counting of a returned variable. We have to balance it whenever we return to the caller. Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface") Cc: Heikki Krogerus Reviewed-by: Heikki Krogerus Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210504222337.3151726-1-andy.shevchenko@gmail.com Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 282c3c825c136..0e1cec346e0f8 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -999,6 +999,7 @@ static const struct typec_operations ucsi_ops = { .pr_set = ucsi_pr_swap }; +/* Caller must call fwnode_handle_put() after use */ static struct fwnode_handle *ucsi_find_fwnode(struct ucsi_connector *con) { struct fwnode_handle *fwnode; @@ -1033,7 +1034,7 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) command |= UCSI_CONNECTOR_NUMBER(con->num); ret = ucsi_send_command(ucsi, command, &con->cap, sizeof(con->cap)); if (ret < 0) - goto out; + goto out_unlock; if (con->cap.op_mode & UCSI_CONCAP_OPMODE_DRP) cap->data = TYPEC_PORT_DRD; @@ -1151,6 +1152,8 @@ static int ucsi_register_port(struct ucsi *ucsi, int index) trace_ucsi_register_port(con->num, &con->status); out: + fwnode_handle_put(cap->fwnode); +out_unlock: mutex_unlock(&con->lock); return ret; } -- GitLab From e17b02d4970913233d543c79c9c66e72cac05bdd Mon Sep 17 00:00:00 2001 From: Marcel Hamer Date: Tue, 27 Apr 2021 14:21:18 +0200 Subject: [PATCH 0256/3804] usb: dwc3: omap: improve extcon initialization When extcon is used in combination with dwc3, it is assumed that the dwc3 registers are untouched and as such are only configured if VBUS is valid or ID is tied to ground. In case VBUS is not valid or ID is floating, the registers are not configured as such during driver initialization, causing a wrong default state during boot. If the registers are not in a default state, because they are for instance touched by a boot loader, this can cause for a kernel error. Signed-off-by: Marcel Hamer Link: https://lore.kernel.org/r/20210427122118.1948340-1-marcel@solidxs.se Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-omap.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-omap.c b/drivers/usb/dwc3/dwc3-omap.c index 3db17806e92e7..e196673f5c647 100644 --- a/drivers/usb/dwc3/dwc3-omap.c +++ b/drivers/usb/dwc3/dwc3-omap.c @@ -437,8 +437,13 @@ static int dwc3_omap_extcon_register(struct dwc3_omap *omap) if (extcon_get_state(edev, EXTCON_USB) == true) dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_VALID); + else + dwc3_omap_set_mailbox(omap, OMAP_DWC3_VBUS_OFF); + if (extcon_get_state(edev, EXTCON_USB_HOST) == true) dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_GROUND); + else + dwc3_omap_set_mailbox(omap, OMAP_DWC3_ID_FLOAT); omap->edev = edev; } -- GitLab From f75297853470627c4ee4e2b80eed40af7441c96b Mon Sep 17 00:00:00 2001 From: Wei Ming Chen Date: Thu, 6 May 2021 20:20:20 +0800 Subject: [PATCH 0257/3804] docs: usb: function: Modify path name Original path does not exists, so changed to "Documentation/ABI/testing/configfs-usb-gadget" Signed-off-by: Wei Ming Chen Link: https://lore.kernel.org/r/20210506122020.7117-1-jj251510319013@gmail.com Signed-off-by: Greg Kroah-Hartman --- Documentation/usb/gadget_configfs.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/usb/gadget_configfs.rst b/Documentation/usb/gadget_configfs.rst index 158e48dab586d..e4566ffb223f2 100644 --- a/Documentation/usb/gadget_configfs.rst +++ b/Documentation/usb/gadget_configfs.rst @@ -140,7 +140,7 @@ is an arbitrary string allowed in a filesystem, e.g.:: Each function provides its specific set of attributes, with either read-only or read-write access. Where applicable they need to be written to as appropriate. -Please refer to Documentation/ABI/*/configfs-usb-gadget* for more information. +Please refer to Documentation/ABI/testing/configfs-usb-gadget for more information. 4. Associating the functions with their configurations ------------------------------------------------------ -- GitLab From a60a34366e0d09ca002c966dd7c43a68c28b1f82 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 6 May 2021 22:39:10 +0200 Subject: [PATCH 0258/3804] usb: fotg210-hcd: Fix an error message 'retval' is known to be -ENODEV here. This is a hard-coded default error code which is not useful in the error message. Moreover, another error message is printed at the end of the error handling path. The corresponding error code (-ENOMEM) is more informative. So remove simplify the first error message. While at it, also remove the useless initialization of 'retval'. Fixes: 7d50195f6c50 ("usb: host: Faraday fotg210-hcd driver") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/94531bcff98e46d4f9c20183a90b7f47f699126c.1620333419.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/fotg210-hcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/host/fotg210-hcd.c index 6cac642520fc8..9c2eda0918e13 100644 --- a/drivers/usb/host/fotg210-hcd.c +++ b/drivers/usb/host/fotg210-hcd.c @@ -5568,7 +5568,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev) struct usb_hcd *hcd; struct resource *res; int irq; - int retval = -ENODEV; + int retval; struct fotg210_hcd *fotg210; if (usb_disabled()) @@ -5588,7 +5588,7 @@ static int fotg210_hcd_probe(struct platform_device *pdev) hcd = usb_create_hcd(&fotg210_fotg210_hc_driver, dev, dev_name(dev)); if (!hcd) { - dev_err(dev, "failed to create hcd with err %d\n", retval); + dev_err(dev, "failed to create hcd\n"); retval = -ENOMEM; goto fail_create_hcd; } -- GitLab From be1c2bb3ba5a39c20b1d54e01ffbcb2b1ca7e46c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Apr 2021 09:00:28 +0100 Subject: [PATCH 0259/3804] ARM: PXA: Fix cplds irqdesc allocation when using legacy mode The Mainstone PXA platform uses CONFIG_SPARSE_IRQ, and thus we cannot rely on the irq descriptors to be readilly allocated before creating the irqdomain in legacy mode. The kernel then complains loudly about not being able to associate the interrupt in the domain -- can't blame it. Fix it by allocating the irqdescs upfront in the legacy case. Fixes: b68761da0111 ("ARM: PXA: Kill use of irq_create_strict_mappings()") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210426223942.GA213931@roeck-us.net --- arch/arm/mach-pxa/pxa_cplds_irqs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-pxa/pxa_cplds_irqs.c b/arch/arm/mach-pxa/pxa_cplds_irqs.c index ec0d9b094744d..bddfc7cd5d40f 100644 --- a/arch/arm/mach-pxa/pxa_cplds_irqs.c +++ b/arch/arm/mach-pxa/pxa_cplds_irqs.c @@ -121,8 +121,13 @@ static int cplds_probe(struct platform_device *pdev) return fpga->irq; base_irq = platform_get_irq(pdev, 1); - if (base_irq < 0) + if (base_irq < 0) { base_irq = 0; + } else { + ret = devm_irq_alloc_descs(&pdev->dev, base_irq, base_irq, CPLDS_NB_IRQ, 0); + if (ret < 0) + return ret; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); fpga->base = devm_ioremap_resource(&pdev->dev, res); -- GitLab From 5b44955dc19808fa209444ccb192343050e95ab0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 13 Apr 2021 14:21:58 +0200 Subject: [PATCH 0260/3804] irqchip/apple-aic: APPLE_AIC should depend on ARCH_APPLE The Apple Interrupt Controller is only present on Apple Silicon SoCs. Hence add a dependency on ARCH_APPLE, to prevent asking the user about this driver when configuring a kernel without Apple Silicon SoC support. Drop the default, as ARCH_APPLE already selects APPLE_AIC. Fixes: 76cde26394114f6a ("irqchip/apple-aic: Add support for the Apple Interrupt Controller") Signed-off-by: Geert Uytterhoeven Acked-by: Hector Martin Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/f37e8daea37d50651d2164b0b3dad90780188548.1618316398.git.geert+renesas@glider.be --- drivers/irqchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index b90e825df7e14..62543a4eccc08 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -596,7 +596,7 @@ config IRQ_IDT3243X config APPLE_AIC bool "Apple Interrupt Controller (AIC)" depends on ARM64 - default ARCH_APPLE + depends on ARCH_APPLE || COMPILE_TEST help Support for the Apple Interrupt Controller found on Apple Silicon SoCs, such as the M1. -- GitLab From 726c945ab2ebd104631b6105ab455a5bc604a3f1 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Tue, 27 Apr 2021 12:42:19 +0800 Subject: [PATCH 0261/3804] hwmon: (corsair-psu) Remove unneeded semicolons Fix the following coccicheck warning: ./drivers/hwmon/corsair-psu.c:379:2-3: Unneeded semicolon Remove unneeded semicolons. Signed-off-by: Wan Jiabing Link: https://lore.kernel.org/r/20210427044219.7799-1-wanjiabing@vivo.com Signed-off-by: Guenter Roeck --- drivers/hwmon/corsair-psu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c index 3a5807e4a2efb..02298b86b57b6 100644 --- a/drivers/hwmon/corsair-psu.c +++ b/drivers/hwmon/corsair-psu.c @@ -355,7 +355,7 @@ static umode_t corsairpsu_hwmon_power_is_visible(const struct corsairpsu_data *p return 0444; default: return 0; - }; + } } static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv, u32 attr, @@ -376,7 +376,7 @@ static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv break; default: break; - }; + } return res; } -- GitLab From 5216dff22dc2bbbbe6f00335f9fd2879670e753b Mon Sep 17 00:00:00 2001 From: Eddie James Date: Thu, 29 Apr 2021 10:13:36 -0500 Subject: [PATCH 0262/3804] hwmon: (occ) Fix poll rate limiting The poll rate limiter time was initialized at zero. This breaks the comparison in time_after if jiffies is large. Switch to storing the next update time rather than the previous time, and initialize the time when the device is probed. Fixes: c10e753d43eb ("hwmon (occ): Add sensor types and versions") Signed-off-by: Eddie James Link: https://lore.kernel.org/r/20210429151336.18980-1-eajames@linux.ibm.com Signed-off-by: Guenter Roeck --- drivers/hwmon/occ/common.c | 5 +++-- drivers/hwmon/occ/common.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c index f1ac153d0b568..967532afb1c01 100644 --- a/drivers/hwmon/occ/common.c +++ b/drivers/hwmon/occ/common.c @@ -217,9 +217,9 @@ int occ_update_response(struct occ *occ) return rc; /* limit the maximum rate of polling the OCC */ - if (time_after(jiffies, occ->last_update + OCC_UPDATE_FREQUENCY)) { + if (time_after(jiffies, occ->next_update)) { rc = occ_poll(occ); - occ->last_update = jiffies; + occ->next_update = jiffies + OCC_UPDATE_FREQUENCY; } else { rc = occ->last_error; } @@ -1165,6 +1165,7 @@ int occ_setup(struct occ *occ, const char *name) return rc; } + occ->next_update = jiffies + OCC_UPDATE_FREQUENCY; occ_parse_poll_response(occ); rc = occ_setup_sensor_attrs(occ); diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h index 67e6968b8978e..e6df719770e81 100644 --- a/drivers/hwmon/occ/common.h +++ b/drivers/hwmon/occ/common.h @@ -99,7 +99,7 @@ struct occ { u8 poll_cmd_data; /* to perform OCC poll command */ int (*send_cmd)(struct occ *occ, u8 *cmd); - unsigned long last_update; + unsigned long next_update; struct mutex lock; /* lock OCC access */ struct device *hwmon; -- GitLab From 2d101db3e5be3bbee6001d4227705cec70ecb82e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?V=C3=A1clav=20Kubern=C3=A1t?= Date: Thu, 29 Apr 2021 09:53:38 +0200 Subject: [PATCH 0263/3804] hwmon: (pmbus/fsp-3y) Fix FSP-3Y YH-5151E non-compliant vout encoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I didn't properly test the driver for YH-5151E, so it was completely broken. Firstly, the log/real mapping was incorrect in one case. Secondly, PMBus specifies that output voltages should be in the linear16 encoding. However, the YH-5151E is non-compliant and uses linear11. YM-2151E isn't affected by this. Fix this by converting the values inside the read functions. linear16 gets the exponent from the VOUT_MODE command. The device doesn't support it, so I have to manually supply the value for it. Both supported devices have now been tested to report correct vout values. Fixes: 1734b4135a62 ("hwmon: Add driver for fsp-3y PSUs and PDUs") Signed-off-by: Václav Kubernát Link: https://lore.kernel.org/r/20210429075337.110502-1-kubernat@cesnet.cz Signed-off-by: Guenter Roeck --- drivers/hwmon/pmbus/fsp-3y.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/pmbus/fsp-3y.c b/drivers/hwmon/pmbus/fsp-3y.c index b177987286ae0..e248424752545 100644 --- a/drivers/hwmon/pmbus/fsp-3y.c +++ b/drivers/hwmon/pmbus/fsp-3y.c @@ -57,7 +57,7 @@ static int page_log_to_page_real(int page_log, enum chips chip) case YH5151E_PAGE_12V_LOG: return YH5151E_PAGE_12V_REAL; case YH5151E_PAGE_5V_LOG: - return YH5151E_PAGE_5V_LOG; + return YH5151E_PAGE_5V_REAL; case YH5151E_PAGE_3V3_LOG: return YH5151E_PAGE_3V3_REAL; } @@ -103,8 +103,18 @@ static int set_page(struct i2c_client *client, int page_log) static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg) { + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + struct fsp3y_data *data = to_fsp3y_data(info); int rv; + /* + * YH5151-E outputs vout in linear11. The conversion is done when + * reading. Here, we have to inject pmbus_core with the correct + * exponent (it is -6). + */ + if (data->chip == yh5151e && reg == PMBUS_VOUT_MODE) + return 0x1A; + rv = set_page(client, page); if (rv < 0) return rv; @@ -114,6 +124,8 @@ static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg) static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase, int reg) { + const struct pmbus_driver_info *info = pmbus_get_driver_info(client); + struct fsp3y_data *data = to_fsp3y_data(info); int rv; /* @@ -144,7 +156,18 @@ static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase, if (rv < 0) return rv; - return i2c_smbus_read_word_data(client, reg); + rv = i2c_smbus_read_word_data(client, reg); + if (rv < 0) + return rv; + + /* + * YH-5151E is non-compliant and outputs output voltages in linear11 + * instead of linear16. + */ + if (data->chip == yh5151e && reg == PMBUS_READ_VOUT) + rv = sign_extend32(rv, 10) & 0xffff; + + return rv; } static struct pmbus_driver_info fsp3y_info[] = { -- GitLab From 1f4642b72be79757f050924a9b9673b6a02034bc Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Mon, 3 May 2021 00:46:11 -0700 Subject: [PATCH 0264/3804] usb: typec: ucsi: Retrieve all the PDOs instead of just the first 4 commit 4dbc6a4ef06d ("usb: typec: ucsi: save power data objects in PD mode") introduced retrieval of the PDOs when connected to a PD-capable source. But only the first 4 PDOs are received since that is the maximum number that can be fetched at a time given the MESSAGE_IN length limitation (16 bytes). However, as per the PD spec a connected source may advertise up to a maximum of 7 PDOs. If such a source is connected it's possible the PPM could have negotiated a power contract with one of the PDOs at index greater than 4, and would be reflected in the request data object's (RDO) object position field. This would result in an out-of-bounds access when the rdo_index() is used to index into the src_pdos array in ucsi_psy_get_voltage_now(). With the help of the UBSAN -fsanitize=array-bounds checker enabled this exact issue is revealed when connecting to a PD source adapter that advertise 5 PDOs and the PPM enters a contract having selected the 5th one. [ 151.545106][ T70] Unexpected kernel BRK exception at EL1 [ 151.545112][ T70] Internal error: BRK handler: f2005512 [#1] PREEMPT SMP ... [ 151.545499][ T70] pc : ucsi_psy_get_prop+0x208/0x20c [ 151.545507][ T70] lr : power_supply_show_property+0xc0/0x328 ... [ 151.545542][ T70] Call trace: [ 151.545544][ T70] ucsi_psy_get_prop+0x208/0x20c [ 151.545546][ T70] power_supply_uevent+0x1a4/0x2f0 [ 151.545550][ T70] dev_uevent+0x200/0x384 [ 151.545555][ T70] kobject_uevent_env+0x1d4/0x7e8 [ 151.545557][ T70] power_supply_changed_work+0x174/0x31c [ 151.545562][ T70] process_one_work+0x244/0x6f0 [ 151.545564][ T70] worker_thread+0x3e0/0xa64 We can resolve this by instead retrieving and storing up to the maximum of 7 PDOs in the con->src_pdos array. This would involve two calls to the GET_PDOS command. Fixes: 992a60ed0d5e ("usb: typec: ucsi: register with power_supply class") Fixes: 4dbc6a4ef06d ("usb: typec: ucsi: save power data objects in PD mode") Cc: stable@vger.kernel.org Reported-and-tested-by: Subbaraman Narayanamurthy Reviewed-by: Heikki Krogerus Signed-off-by: Jack Pham Link: https://lore.kernel.org/r/20210503074611.30973-1-jackp@codeaurora.org Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/ucsi/ucsi.c | 41 +++++++++++++++++++++++++++-------- drivers/usb/typec/ucsi/ucsi.h | 6 +++-- 2 files changed, 36 insertions(+), 11 deletions(-) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index 0e1cec346e0f8..1d8b7df59ff49 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -495,7 +495,8 @@ static void ucsi_unregister_altmodes(struct ucsi_connector *con, u8 recipient) } } -static void ucsi_get_pdos(struct ucsi_connector *con, int is_partner) +static int ucsi_get_pdos(struct ucsi_connector *con, int is_partner, + u32 *pdos, int offset, int num_pdos) { struct ucsi *ucsi = con->ucsi; u64 command; @@ -503,17 +504,39 @@ static void ucsi_get_pdos(struct ucsi_connector *con, int is_partner) command = UCSI_COMMAND(UCSI_GET_PDOS) | UCSI_CONNECTOR_NUMBER(con->num); command |= UCSI_GET_PDOS_PARTNER_PDO(is_partner); - command |= UCSI_GET_PDOS_NUM_PDOS(UCSI_MAX_PDOS - 1); + command |= UCSI_GET_PDOS_PDO_OFFSET(offset); + command |= UCSI_GET_PDOS_NUM_PDOS(num_pdos - 1); command |= UCSI_GET_PDOS_SRC_PDOS; - ret = ucsi_send_command(ucsi, command, con->src_pdos, - sizeof(con->src_pdos)); - if (ret < 0) { + ret = ucsi_send_command(ucsi, command, pdos + offset, + num_pdos * sizeof(u32)); + if (ret < 0) dev_err(ucsi->dev, "UCSI_GET_PDOS failed (%d)\n", ret); + if (ret == 0 && offset == 0) + dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n"); + + return ret; +} + +static void ucsi_get_src_pdos(struct ucsi_connector *con, int is_partner) +{ + int ret; + + /* UCSI max payload means only getting at most 4 PDOs at a time */ + ret = ucsi_get_pdos(con, 1, con->src_pdos, 0, UCSI_MAX_PDOS); + if (ret < 0) return; - } + con->num_pdos = ret / sizeof(u32); /* number of bytes to 32-bit PDOs */ - if (ret == 0) - dev_warn(ucsi->dev, "UCSI_GET_PDOS returned 0 bytes\n"); + if (con->num_pdos < UCSI_MAX_PDOS) + return; + + /* get the remaining PDOs, if any */ + ret = ucsi_get_pdos(con, 1, con->src_pdos, UCSI_MAX_PDOS, + PDO_MAX_OBJECTS - UCSI_MAX_PDOS); + if (ret < 0) + return; + + con->num_pdos += ret / sizeof(u32); } static void ucsi_pwr_opmode_change(struct ucsi_connector *con) @@ -522,7 +545,7 @@ static void ucsi_pwr_opmode_change(struct ucsi_connector *con) case UCSI_CONSTAT_PWR_OPMODE_PD: con->rdo = con->status.request_data_obj; typec_set_pwr_opmode(con->port, TYPEC_PWR_MODE_PD); - ucsi_get_pdos(con, 1); + ucsi_get_src_pdos(con, 1); break; case UCSI_CONSTAT_PWR_OPMODE_TYPEC1_5: con->rdo = 0; diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h index 3920e20a9e9ef..cee666790907e 100644 --- a/drivers/usb/typec/ucsi/ucsi.h +++ b/drivers/usb/typec/ucsi/ucsi.h @@ -8,6 +8,7 @@ #include #include #include +#include #include /* -------------------------------------------------------------------------- */ @@ -134,7 +135,9 @@ void ucsi_connector_change(struct ucsi *ucsi, u8 num); /* GET_PDOS command bits */ #define UCSI_GET_PDOS_PARTNER_PDO(_r_) ((u64)(_r_) << 23) +#define UCSI_GET_PDOS_PDO_OFFSET(_r_) ((u64)(_r_) << 24) #define UCSI_GET_PDOS_NUM_PDOS(_r_) ((u64)(_r_) << 32) +#define UCSI_MAX_PDOS (4) #define UCSI_GET_PDOS_SRC_PDOS ((u64)1 << 34) /* -------------------------------------------------------------------------- */ @@ -302,7 +305,6 @@ struct ucsi { #define UCSI_MAX_SVID 5 #define UCSI_MAX_ALTMODES (UCSI_MAX_SVID * 6) -#define UCSI_MAX_PDOS (4) #define UCSI_TYPEC_VSAFE5V 5000 #define UCSI_TYPEC_1_5_CURRENT 1500 @@ -330,7 +332,7 @@ struct ucsi_connector { struct power_supply *psy; struct power_supply_desc psy_desc; u32 rdo; - u32 src_pdos[UCSI_MAX_PDOS]; + u32 src_pdos[PDO_MAX_OBJECTS]; int num_pdos; struct usb_role_switch *usb_role_sw; -- GitLab From c34e85fa69b9f4568f19da3af06c3870dd8fcc50 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Fri, 7 May 2021 14:22:59 +0800 Subject: [PATCH 0265/3804] usb: typec: tcpm: Send DISCOVER_IDENTITY from dedicated work In current design, DISCOVER_IDENTITY is queued to VDM state machine immediately in Ready states and never retries if it fails in the AMS. Move the process to a delayed work so that when it fails for some reasons (e.g. Sink Tx No Go), it can be retried by queueing the work again. Also fix a problem that the vdm_state is not set to a proper state if it is blocked by Collision Avoidance mechanism. Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Kyle Tso Link: https://lore.kernel.org/r/20210507062300.1945009-2-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 85 ++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 10 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 68e04e397e924..ae1e84252d38f 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -259,6 +259,7 @@ enum frs_typec_current { #define ALTMODE_DISCOVERY_MAX (SVID_DISCOVERY_MAX * MODE_DISCOVERY_MAX) #define GET_SINK_CAP_RETRY_MS 100 +#define SEND_DISCOVER_RETRY_MS 100 struct pd_mode_data { int svid_index; /* current SVID index */ @@ -366,6 +367,8 @@ struct tcpm_port { struct kthread_work vdm_state_machine; struct hrtimer enable_frs_timer; struct kthread_work enable_frs; + struct hrtimer send_discover_timer; + struct kthread_work send_discover_work; bool state_machine_running; bool vdm_sm_running; @@ -1178,6 +1181,16 @@ static void mod_enable_frs_delayed_work(struct tcpm_port *port, unsigned int del } } +static void mod_send_discover_delayed_work(struct tcpm_port *port, unsigned int delay_ms) +{ + if (delay_ms) { + hrtimer_start(&port->send_discover_timer, ms_to_ktime(delay_ms), HRTIMER_MODE_REL); + } else { + hrtimer_cancel(&port->send_discover_timer); + kthread_queue_work(port->wq, &port->send_discover_work); + } +} + static void tcpm_set_state(struct tcpm_port *port, enum tcpm_state state, unsigned int delay_ms) { @@ -1855,6 +1868,9 @@ static void vdm_run_state_machine(struct tcpm_port *port) res = tcpm_ams_start(port, DISCOVER_IDENTITY); if (res == 0) port->send_discover = false; + else if (res == -EAGAIN) + mod_send_discover_delayed_work(port, + SEND_DISCOVER_RETRY_MS); break; case CMD_DISCOVER_SVID: res = tcpm_ams_start(port, DISCOVER_SVIDS); @@ -1880,6 +1896,7 @@ static void vdm_run_state_machine(struct tcpm_port *port) } if (res < 0) { + port->vdm_state = VDM_STATE_ERR_BUSY; port->vdm_sm_running = false; return; } @@ -3682,14 +3699,6 @@ static inline enum tcpm_state unattached_state(struct tcpm_port *port) return SNK_UNATTACHED; } -static void tcpm_check_send_discover(struct tcpm_port *port) -{ - if ((port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20) && - port->send_discover && port->pd_capable) - tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0); - port->send_discover = false; -} - static void tcpm_swap_complete(struct tcpm_port *port, int result) { if (port->swap_pending) { @@ -3926,7 +3935,18 @@ static void run_state_machine(struct tcpm_port *port) break; } - tcpm_check_send_discover(port); + /* + * 6.4.4.3.1 Discover Identity + * "The Discover Identity Command Shall only be sent to SOP when there is an + * Explicit Contract." + * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using + * port->explicit_contract to decide whether to send the command. + */ + if (port->explicit_contract) + mod_send_discover_delayed_work(port, 0); + else + port->send_discover = false; + /* * 6.3.5 * Sending ping messages is not necessary if @@ -4194,7 +4214,18 @@ static void run_state_machine(struct tcpm_port *port) break; } - tcpm_check_send_discover(port); + /* + * 6.4.4.3.1 Discover Identity + * "The Discover Identity Command Shall only be sent to SOP when there is an + * Explicit Contract." + * For now, this driver only supports SOP for DISCOVER_IDENTITY, thus using + * port->explicit_contract. + */ + if (port->explicit_contract) + mod_send_discover_delayed_work(port, 0); + else + port->send_discover = false; + power_supply_changed(port->psy); break; @@ -5288,6 +5319,29 @@ unlock: mutex_unlock(&port->lock); } +static void tcpm_send_discover_work(struct kthread_work *work) +{ + struct tcpm_port *port = container_of(work, struct tcpm_port, send_discover_work); + + mutex_lock(&port->lock); + /* No need to send DISCOVER_IDENTITY anymore */ + if (!port->send_discover) + goto unlock; + + /* Retry if the port is not idle */ + if ((port->state != SRC_READY && port->state != SNK_READY) || port->vdm_sm_running) { + mod_send_discover_delayed_work(port, SEND_DISCOVER_RETRY_MS); + goto unlock; + } + + /* Only send the Message if the port is host for PD rev2.0 */ + if (port->data_role == TYPEC_HOST || port->negotiated_rev > PD_REV20) + tcpm_send_vdm(port, USB_SID_PD, CMD_DISCOVER_IDENT, NULL, 0); + +unlock: + mutex_unlock(&port->lock); +} + static int tcpm_dr_set(struct typec_port *p, enum typec_data_role data) { struct tcpm_port *port = typec_get_drvdata(p); @@ -6093,6 +6147,14 @@ static enum hrtimer_restart enable_frs_timer_handler(struct hrtimer *timer) return HRTIMER_NORESTART; } +static enum hrtimer_restart send_discover_timer_handler(struct hrtimer *timer) +{ + struct tcpm_port *port = container_of(timer, struct tcpm_port, send_discover_timer); + + kthread_queue_work(port->wq, &port->send_discover_work); + return HRTIMER_NORESTART; +} + struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) { struct tcpm_port *port; @@ -6123,12 +6185,15 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) kthread_init_work(&port->vdm_state_machine, vdm_state_machine_work); kthread_init_work(&port->event_work, tcpm_pd_event_handler); kthread_init_work(&port->enable_frs, tcpm_enable_frs_work); + kthread_init_work(&port->send_discover_work, tcpm_send_discover_work); hrtimer_init(&port->state_machine_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); port->state_machine_timer.function = state_machine_timer_handler; hrtimer_init(&port->vdm_state_machine_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); port->vdm_state_machine_timer.function = vdm_state_machine_timer_handler; hrtimer_init(&port->enable_frs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); port->enable_frs_timer.function = enable_frs_timer_handler; + hrtimer_init(&port->send_discover_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + port->send_discover_timer.function = send_discover_timer_handler; spin_lock_init(&port->pd_event_lock); -- GitLab From f1fbd950b59b67bc5c202216c8e1c6ca8c99a3b4 Mon Sep 17 00:00:00 2001 From: Kyle Tso Date: Fri, 7 May 2021 14:23:00 +0800 Subject: [PATCH 0266/3804] usb: typec: tcpm: Fix wrong handling for Not_Supported in VDM AMS Not_Supported Message is acceptable in VDM AMS. Redirect the VDM state machine to VDM_STATE_DONE when receiving Not_Supported and finish the VDM AMS. Also, after the loop in vdm_state_machine_work, add more conditions of VDM states to clear the vdm_sm_running flag because those are all stopping states when leaving the loop. In addition, finish the VDM AMS if the port partner responds BUSY. Fixes: 8dea75e11380 ("usb: typec: tcpm: Protocol Error handling") Fixes: 8d3a0578ad1a ("usb: typec: tcpm: Respond Wait if VDM state machine is running") Reviewed-by: Guenter Roeck Signed-off-by: Kyle Tso Link: https://lore.kernel.org/r/20210507062300.1945009-3-kyletso@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index ae1e84252d38f..db567e6fde924 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -1897,7 +1897,6 @@ static void vdm_run_state_machine(struct tcpm_port *port) if (res < 0) { port->vdm_state = VDM_STATE_ERR_BUSY; - port->vdm_sm_running = false; return; } } @@ -1913,6 +1912,7 @@ static void vdm_run_state_machine(struct tcpm_port *port) port->vdo_data[0] = port->vdo_retry; port->vdo_count = 1; port->vdm_state = VDM_STATE_READY; + tcpm_ams_finish(port); break; case VDM_STATE_BUSY: port->vdm_state = VDM_STATE_ERR_TMOUT; @@ -1978,7 +1978,7 @@ static void vdm_state_machine_work(struct kthread_work *work) port->vdm_state != VDM_STATE_BUSY && port->vdm_state != VDM_STATE_SEND_MESSAGE); - if (port->vdm_state == VDM_STATE_ERR_TMOUT) + if (port->vdm_state < VDM_STATE_READY) port->vdm_sm_running = false; mutex_unlock(&port->lock); @@ -2569,6 +2569,16 @@ static void tcpm_pd_ctrl_request(struct tcpm_port *port, port->sink_cap_done = true; tcpm_set_state(port, ready_state(port), 0); break; + case SRC_READY: + case SNK_READY: + if (port->vdm_state > VDM_STATE_READY) { + port->vdm_state = VDM_STATE_DONE; + if (tcpm_vdm_ams(port)) + tcpm_ams_finish(port); + mod_vdm_delayed_work(port, 0); + break; + } + fallthrough; default: tcpm_pd_handle_state(port, port->pwr_role == TYPEC_SOURCE ? -- GitLab From d9ff1096a840dddea3d5cfa2149ff7da9f499fb2 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 4 May 2021 22:26:29 +0200 Subject: [PATCH 0267/3804] usb: musb: Fix an error message 'ret' is known to be 0 here. Initialize 'ret' with the expected error code before using it. Fixes: 0990366bab3c ("usb: musb: Add support for MediaTek musb controller") Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/69f514dc7134e3c917cad208e73cc650cb9e2bd6.1620159879.git.christophe.jaillet@wanadoo.fr Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/mediatek.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/mediatek.c b/drivers/usb/musb/mediatek.c index eebeadd269461..6b92d037d8fc8 100644 --- a/drivers/usb/musb/mediatek.c +++ b/drivers/usb/musb/mediatek.c @@ -518,8 +518,8 @@ static int mtk_musb_probe(struct platform_device *pdev) glue->xceiv = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2); if (IS_ERR(glue->xceiv)) { - dev_err(dev, "fail to getting usb-phy %d\n", ret); ret = PTR_ERR(glue->xceiv); + dev_err(dev, "fail to getting usb-phy %d\n", ret); goto err_unregister_usb_phy; } -- GitLab From 1c4841ccbd2b185587010d6178aac11953f61d4c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 26 Apr 2021 16:09:19 -0700 Subject: [PATCH 0268/3804] dmaengine: idxd: add engine 'struct device' missing bus type assignment engine 'struct device' setup is missing assigning the bus type. Add it to dsa_bus_type. Fixes: 75b911309060 ("dmaengine: idxd: fix engine conf_dev lifetime") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161947841562.984844.17505646725993659651.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 2a926bef87f2a..ec7305f86bf74 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -242,6 +242,7 @@ static int idxd_setup_engines(struct idxd_device *idxd) engine->idxd = idxd; device_initialize(&engine->conf_dev); engine->conf_dev.parent = &idxd->conf_dev; + engine->conf_dev.bus = &dsa_bus_type; engine->conf_dev.type = &idxd_engine_device_type; rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id); if (rc < 0) { -- GitLab From 077cdb355b3d8ee0f258856962e6dac06e744401 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 26 Apr 2021 16:32:24 -0700 Subject: [PATCH 0269/3804] dmaengine: idxd: add missing dsa driver unregister The idxd_unregister_driver() has never been called for the idxd driver upon removal. Add fix to call unregister driver on module removal. Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver") Signed-off-by: Dave Jiang Link: https://lore.kernel.org/r/161947994449.1053102.13189942817915448216.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Vinod Koul --- drivers/dma/idxd/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index ec7305f86bf74..6201f52f13f5d 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -788,6 +788,7 @@ module_init(idxd_init_module); static void __exit idxd_exit_module(void) { + idxd_unregister_driver(); pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); idxd_unregister_bus_type(); -- GitLab From 28ec344bb8911bb0d4910456b22ba0dd4f662521 Mon Sep 17 00:00:00 2001 From: Saravana Kannan Date: Wed, 5 May 2021 17:44:22 -0700 Subject: [PATCH 0270/3804] usb: typec: tcpm: Don't block probing of consumers of "connector" nodes fw_devlink expects DT device nodes with "compatible" property to have struct devices created for them. Since the connector node might not be populated as a device, mark it as such so that fw_devlink knows not to wait on this fwnode being populated as a struct device. Without this patch, USB functionality can be broken on some boards. Fixes: f7514a663016 ("of: property: fw_devlink: Add support for remote-endpoint") Reported-by: John Stultz Tested-by: John Stultz Signed-off-by: Saravana Kannan Link: https://lore.kernel.org/r/20210506004423.345199-1-saravanak@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/base/core.c | 3 ++- drivers/usb/typec/tcpm/tcpm.c | 9 +++++++++ include/linux/fwnode.h | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 4a8bf8cda52bc..628e33939acae 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -150,7 +150,7 @@ void fwnode_links_purge(struct fwnode_handle *fwnode) fwnode_links_purge_consumers(fwnode); } -static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) +void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) { struct fwnode_handle *child; @@ -164,6 +164,7 @@ static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) fwnode_for_each_available_child_node(fwnode, child) fw_devlink_purge_absent_suppliers(child); } +EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers); #ifdef CONFIG_SRCU static DEFINE_MUTEX(device_links_lock); diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index c4fdc00a3bc8f..bffa342d4e386 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5754,6 +5754,15 @@ static int tcpm_fw_get_caps(struct tcpm_port *port, if (!fwnode) return -EINVAL; + /* + * This fwnode has a "compatible" property, but is never populated as a + * struct device. Instead we simply parse it to read the properties. + * This it breaks fw_devlink=on. To maintain backward compatibility + * with existing DT files, we work around this by deleting any + * fwnode_links to/from this fwnode. + */ + fw_devlink_purge_absent_suppliers(fwnode); + /* USB data support is optional */ ret = fwnode_property_read_string(fwnode, "data-role", &cap_str); if (ret == 0) { diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index ed4e67a7ff1c4..59828516ebaf1 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -187,5 +187,6 @@ extern u32 fw_devlink_get_flags(void); extern bool fw_devlink_is_strict(void); int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup); void fwnode_links_purge(struct fwnode_handle *fwnode); +void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode); #endif -- GitLab From b577750e4157050ed6de5ca9083893027b8ece33 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 3 May 2021 12:06:03 +0200 Subject: [PATCH 0271/3804] MAINTAINERS: Add Matthew Bobrowski as a reviewer Matthew helps with fanotify already for some time and he'd like to do more so let's add him as a reviewer. CC: Matthew Bobrowski Signed-off-by: Jan Kara --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f2..e15e155ff10eb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6938,6 +6938,7 @@ F: net/core/failover.c FANOTIFY M: Jan Kara R: Amir Goldstein +R: Matthew Bobrowski L: linux-fsdevel@vger.kernel.org S: Maintained F: fs/notify/fanotify/ -- GitLab From 8c721cb0f742f9a01f2f1985b274b544f89904f4 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 28 Apr 2021 10:44:19 +0200 Subject: [PATCH 0272/3804] quota: Use 'hlist_for_each_entry' to simplify code Use 'hlist_for_each_entry' instead of hand writing it. This saves a few lines of code. Link: https://lore.kernel.org/r/f82d3e33964dcbd2aac19866735e0a8381c8a735.1619599407.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Jan Kara --- fs/quota/dquot.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 4f13734637660..22d904bde6ab9 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -288,14 +288,12 @@ static inline void remove_dquot_hash(struct dquot *dquot) static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb, struct kqid qid) { - struct hlist_node *node; struct dquot *dquot; - hlist_for_each (node, dquot_hash+hashent) { - dquot = hlist_entry(node, struct dquot, dq_hash); + hlist_for_each_entry(dquot, dquot_hash+hashent, dq_hash) if (dquot->dq_sb == sb && qid_eq(dquot->dq_id, qid)) return dquot; - } + return NULL; } -- GitLab From 8370e5b093080c03cf89f7ebf0bef6984545429e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 13:01:36 +0300 Subject: [PATCH 0273/3804] hwmon: (ltc2992) Put fwnode in error case during ->probe() In each iteration fwnode_for_each_available_child_node() bumps a reference counting of a loop variable followed by dropping in on a next iteration, Since in error case the loop is broken, we have to drop a reference count by ourselves. Do it for port_fwnode in error case during ->probe(). Fixes: b0bd407e94b0 ("hwmon: (ltc2992) Add support") Cc: Alexandru Tachici Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510100136.3303142-1-andy.shevchenko@gmail.com Signed-off-by: Guenter Roeck --- drivers/hwmon/ltc2992.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 4382105bf1420..2a4bed0ab226b 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -900,11 +900,15 @@ static int ltc2992_parse_dt(struct ltc2992_state *st) fwnode_for_each_available_child_node(fwnode, child) { ret = fwnode_property_read_u32(child, "reg", &addr); - if (ret < 0) + if (ret < 0) { + fwnode_handle_put(child); return ret; + } - if (addr > 1) + if (addr > 1) { + fwnode_handle_put(child); return -EINVAL; + } ret = fwnode_property_read_u32(child, "shunt-resistor-micro-ohms", &val); if (!ret) -- GitLab From e84749a78dc82bc545f12ce009e3dbcc2c5a8a91 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 10 May 2021 17:06:59 +0200 Subject: [PATCH 0274/3804] ALSA: usb-audio: Validate MS endpoint descriptors snd_usbmidi_get_ms_info() may access beyond the border when a malformed descriptor is passed. This patch adds the sanity checks of the given MS endpoint descriptors, and skips invalid ones. Reported-by: syzbot+6bb23a5d5548b93c94aa@syzkaller.appspotmail.com Cc: Link: https://lore.kernel.org/r/20210510150659.17710-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index a10ac75969a8f..649eb8d1ab7dd 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1956,8 +1956,12 @@ static int snd_usbmidi_get_ms_info(struct snd_usb_midi *umidi, ms_ep = find_usb_ms_endpoint_descriptor(hostep); if (!ms_ep) continue; + if (ms_ep->bLength <= sizeof(*ms_ep)) + continue; if (ms_ep->bNumEmbMIDIJack > 0x10) continue; + if (ms_ep->bLength < sizeof(*ms_ep) + ms_ep->bNumEmbMIDIJack) + continue; if (usb_endpoint_dir_out(ep)) { if (endpoints[epidx].out_ep) { if (++epidx >= MIDI_MAX_ENDPOINTS) { -- GitLab From 7ee06ddc4038f936b0d4459d37a7d4d844fb03db Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 7 May 2021 11:38:10 -0400 Subject: [PATCH 0275/3804] dm snapshot: fix a crash when an origin has no snapshots If an origin target has no snapshots, o->split_boundary is set to 0. This causes BUG_ON(sectors <= 0) in block/bio.c:bio_split(). Fix this by initializing chunk_size, and in turn split_boundary, to rounddown_pow_of_two(UINT_MAX) -- the largest power of two that fits into "unsigned" type. Reported-by: Michael Tokarev Tested-by: Michael Tokarev Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- drivers/md/dm-snap.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a2acb014c13ae..2a51ddd840b41 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -855,12 +855,11 @@ static int dm_add_exception(void *context, chunk_t old, chunk_t new) static uint32_t __minimum_chunk_size(struct origin *o) { struct dm_snapshot *snap; - unsigned chunk_size = 0; + unsigned chunk_size = rounddown_pow_of_two(UINT_MAX); if (o) list_for_each_entry(snap, &o->snapshots, list) - chunk_size = min_not_zero(chunk_size, - snap->store->chunk_size); + chunk_size = min(chunk_size, snap->store->chunk_size); return (uint32_t) chunk_size; } -- GitLab From 63c8af5687f6b1b70e9458cac1ffb25e86db1695 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 10 May 2021 08:48:06 +0900 Subject: [PATCH 0276/3804] block: uapi: fix comment about block device ioctl Fix the comment mentioning ioctl command range used for zoned block devices to reflect the range of commands actually implemented. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210509234806.3000-1-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/uapi/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index f44eb0a04afdd..4c32e97dcdf00 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -185,7 +185,7 @@ struct fsxattr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) /* - * A jump here: 130-131 are reserved for zoned block devices + * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) */ -- GitLab From 17866bc6b2ae1c3075c9fe7bcbeb8ea50eb4c3fc Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Sat, 8 May 2021 11:00:56 +0800 Subject: [PATCH 0277/3804] dmaengine: fsl-dpaa2-qdma: Fix error return code in two functions Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in the function where it is. Fixes: 7fdf9b05c73b ("dmaengine: fsl-dpaa2-qdma: Add NXP dpaa2 qDMA controller driver for Layerscape SoCs") Reported-by: Hulk Robot Signed-off-by: Zhen Lei Link: https://lore.kernel.org/r/20210508030056.2027-1-thunder.leizhen@huawei.com Signed-off-by: Vinod Koul --- drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c index 4ec909e0b8106..4ae057922ef1f 100644 --- a/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c +++ b/drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c @@ -332,6 +332,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev) } if (priv->dpdmai_attr.version.major > DPDMAI_VER_MAJOR) { + err = -EINVAL; dev_err(dev, "DPDMAI major version mismatch\n" "Found %u.%u, supported version is %u.%u\n", priv->dpdmai_attr.version.major, @@ -341,6 +342,7 @@ static int __cold dpaa2_qdma_setup(struct fsl_mc_device *ls_dev) } if (priv->dpdmai_attr.version.minor > DPDMAI_VER_MINOR) { + err = -EINVAL; dev_err(dev, "DPDMAI minor version mismatch\n" "Found %u.%u, supported version is %u.%u\n", priv->dpdmai_attr.version.major, @@ -475,6 +477,7 @@ static int __cold dpaa2_qdma_dpio_setup(struct dpaa2_qdma_priv *priv) ppriv->store = dpaa2_io_store_create(DPAA2_QDMA_STORE_SIZE, dev); if (!ppriv->store) { + err = -ENOMEM; dev_err(dev, "dpaa2_io_store_create() failed\n"); goto err_store; } -- GitLab From 4ad5dd2d7876d79507a20f026507d1a93b8fff10 Mon Sep 17 00:00:00 2001 From: Bumyong Lee Date: Fri, 7 May 2021 15:36:47 +0900 Subject: [PATCH 0278/3804] dmaengine: pl330: fix wrong usage of spinlock flags in dma_cyclc flags varible which is the input parameter of pl330_prep_dma_cyclic() should not be used by spinlock_irq[save/restore] function. Signed-off-by: Jongho Park Signed-off-by: Bumyong Lee Signed-off-by: Chanho Park Link: https://lore.kernel.org/r/20210507063647.111209-1-chanho61.park@samsung.com Fixes: f6f2421c0a1c ("dmaengine: pl330: Merge dma_pl330_dmac and pl330_dmac structs") Cc: stable@vger.kernel.org Signed-off-by: Vinod Koul --- drivers/dma/pl330.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index fd8d2bc3be9f5..110de8a600588 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2694,13 +2694,15 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( for (i = 0; i < len / period_len; i++) { desc = pl330_get_desc(pch); if (!desc) { + unsigned long iflags; + dev_err(pch->dmac->ddma.dev, "%s:%d Unable to fetch desc\n", __func__, __LINE__); if (!first) return NULL; - spin_lock_irqsave(&pl330->pool_lock, flags); + spin_lock_irqsave(&pl330->pool_lock, iflags); while (!list_empty(&first->node)) { desc = list_entry(first->node.next, @@ -2710,7 +2712,7 @@ static struct dma_async_tx_descriptor *pl330_prep_dma_cyclic( list_move_tail(&first->node, &pl330->desc_pool); - spin_unlock_irqrestore(&pl330->pool_lock, flags); + spin_unlock_irqrestore(&pl330->pool_lock, iflags); return NULL; } -- GitLab From 538ea65a9fd1194352a41313bff876b74b5d90c5 Mon Sep 17 00:00:00 2001 From: Quanyang Wang Date: Fri, 30 Apr 2021 14:40:41 +0800 Subject: [PATCH 0279/3804] dmaengine: xilinx: dpdma: initialize registers before request_irq In some scenarios (kdump), dpdma hardware irqs has been enabled when calling request_irq in probe function, and then the dpdma irq handler xilinx_dpdma_irq_handler is invoked to access xdev->chan[i]. But at this moment xdev->chan[i] hasn't been initialized. We should ensure the dpdma controller to be in a consistent and clean state before further initialization. So add dpdma_hw_init() to do this. Furthermore, in xilinx_dpdma_disable_irq, disable all interrupts instead of error interrupts. This patch is to fix the kdump kernel crash as below: [ 3.696128] Unable to handle kernel NULL pointer dereference at virtual address 000000000000012c [ 3.696710] xilinx-zynqmp-dpdma fd4c0000.dma-controller: Xilinx DPDMA engine is probed [ 3.704900] Mem abort info: [ 3.704902] ESR = 0x96000005 [ 3.704905] EC = 0x25: DABT (current EL), IL = 32 bits [ 3.704907] SET = 0, FnV = 0 [ 3.704912] EA = 0, S1PTW = 0 [ 3.713800] ahci-ceva fd0c0000.ahci: supply ahci not found, using dummy regulator [ 3.715585] Data abort info: [ 3.715587] ISV = 0, ISS = 0x00000005 [ 3.715589] CM = 0, WnR = 0 [ 3.715592] [000000000000012c] user address but active_mm is swapper [ 3.715596] Internal error: Oops: 96000005 [#1] SMP [ 3.715599] Modules linked in: [ 3.715608] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.10.0-12170-g60894882155f-dirty #77 [ 3.723937] Hardware name: ZynqMP ZCU102 Rev1.0 (DT) [ 3.723942] pstate: 80000085 (Nzcv daIf -PAN -UAO -TCO BTYPE=--) [ 3.723956] pc : xilinx_dpdma_irq_handler+0x418/0x560 [ 3.793049] lr : xilinx_dpdma_irq_handler+0x3d8/0x560 [ 3.798089] sp : ffffffc01186bdf0 [ 3.801388] x29: ffffffc01186bdf0 x28: ffffffc011836f28 [ 3.806692] x27: ffffff8023e0ac80 x26: 0000000000000080 [ 3.811996] x25: 0000000008000408 x24: 0000000000000003 [ 3.817300] x23: ffffffc01186be70 x22: ffffffc011291740 [ 3.822604] x21: 0000000000000000 x20: 0000000008000408 [ 3.827908] x19: 0000000000000000 x18: 0000000000000010 [ 3.833212] x17: 0000000000000000 x16: 0000000000000000 [ 3.838516] x15: 0000000000000000 x14: ffffffc011291740 [ 3.843820] x13: ffffffc02eb4d000 x12: 0000000034d4d91d [ 3.849124] x11: 0000000000000040 x10: ffffffc0112d2d48 [ 3.854428] x9 : ffffffc0112d2d40 x8 : ffffff8021c00268 [ 3.859732] x7 : 0000000000000000 x6 : ffffffc011836000 [ 3.865036] x5 : 0000000000000003 x4 : 0000000000000000 [ 3.870340] x3 : 0000000000000001 x2 : 0000000000000000 [ 3.875644] x1 : 0000000000000000 x0 : 000000000000012c [ 3.880948] Call trace: [ 3.883382] xilinx_dpdma_irq_handler+0x418/0x560 [ 3.888079] __handle_irq_event_percpu+0x5c/0x178 [ 3.892774] handle_irq_event_percpu+0x34/0x98 [ 3.897210] handle_irq_event+0x44/0xb8 [ 3.901030] handle_fasteoi_irq+0xd0/0x190 [ 3.905117] generic_handle_irq+0x30/0x48 [ 3.909111] __handle_domain_irq+0x64/0xc0 [ 3.913192] gic_handle_irq+0x78/0xa0 [ 3.916846] el1_irq+0xc4/0x180 [ 3.919982] cpuidle_enter_state+0x134/0x2f8 [ 3.924243] cpuidle_enter+0x38/0x50 [ 3.927810] call_cpuidle+0x1c/0x40 [ 3.931290] do_idle+0x20c/0x270 [ 3.934502] cpu_startup_entry+0x28/0x58 [ 3.938410] rest_init+0xbc/0xcc [ 3.941631] arch_call_rest_init+0x10/0x1c [ 3.945718] start_kernel+0x51c/0x558 Fixes: 7cbb0c63de3f ("dmaengine: xilinx: dpdma: Add the Xilinx DisplayPort DMA engine driver") Signed-off-by: Quanyang Wang Link: https://lore.kernel.org/r/20210430064041.4058180-1-quanyang.wang@windriver.com Signed-off-by: Vinod Koul --- drivers/dma/xilinx/xilinx_dpdma.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c index 70b29bd079c9f..ff7dfb3fdeb47 100644 --- a/drivers/dma/xilinx/xilinx_dpdma.c +++ b/drivers/dma/xilinx/xilinx_dpdma.c @@ -1459,7 +1459,7 @@ static void xilinx_dpdma_enable_irq(struct xilinx_dpdma_device *xdev) */ static void xilinx_dpdma_disable_irq(struct xilinx_dpdma_device *xdev) { - dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ERR_ALL); + dpdma_write(xdev->reg, XILINX_DPDMA_IDS, XILINX_DPDMA_INTR_ALL); dpdma_write(xdev->reg, XILINX_DPDMA_EIDS, XILINX_DPDMA_EINTR_ALL); } @@ -1596,6 +1596,26 @@ static struct dma_chan *of_dma_xilinx_xlate(struct of_phandle_args *dma_spec, return dma_get_slave_channel(&xdev->chan[chan_id]->vchan.chan); } +static void dpdma_hw_init(struct xilinx_dpdma_device *xdev) +{ + unsigned int i; + void __iomem *reg; + + /* Disable all interrupts */ + xilinx_dpdma_disable_irq(xdev); + + /* Stop all channels */ + for (i = 0; i < ARRAY_SIZE(xdev->chan); i++) { + reg = xdev->reg + XILINX_DPDMA_CH_BASE + + XILINX_DPDMA_CH_OFFSET * i; + dpdma_clr(reg, XILINX_DPDMA_CH_CNTL, XILINX_DPDMA_CH_CNTL_ENABLE); + } + + /* Clear the interrupt status registers */ + dpdma_write(xdev->reg, XILINX_DPDMA_ISR, XILINX_DPDMA_INTR_ALL); + dpdma_write(xdev->reg, XILINX_DPDMA_EISR, XILINX_DPDMA_EINTR_ALL); +} + static int xilinx_dpdma_probe(struct platform_device *pdev) { struct xilinx_dpdma_device *xdev; @@ -1622,6 +1642,8 @@ static int xilinx_dpdma_probe(struct platform_device *pdev) if (IS_ERR(xdev->reg)) return PTR_ERR(xdev->reg); + dpdma_hw_init(xdev); + xdev->irq = platform_get_irq(pdev, 0); if (xdev->irq < 0) { dev_err(xdev->dev, "failed to get platform irq\n"); -- GitLab From 56a8d3fd1f342d10ee7b27e9ac0f4d00b5fbb91c Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:34 +0200 Subject: [PATCH 0280/3804] mtd: rawnand: cs553x: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-2-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/cs553x_nand.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/cs553x_nand.c b/drivers/mtd/nand/raw/cs553x_nand.c index 6edf78c16fc8b..df40927e56788 100644 --- a/drivers/mtd/nand/raw/cs553x_nand.c +++ b/drivers/mtd/nand/raw/cs553x_nand.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -240,6 +241,15 @@ static int cs_calculate_ecc(struct nand_chip *this, const u_char *dat, return 0; } +static int cs553x_ecc_correct(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); +} + static struct cs553x_nand_controller *controllers[4]; static int cs553x_attach_chip(struct nand_chip *chip) @@ -251,7 +261,7 @@ static int cs553x_attach_chip(struct nand_chip *chip) chip->ecc.bytes = 3; chip->ecc.hwctl = cs_enable_hwecc; chip->ecc.calculate = cs_calculate_ecc; - chip->ecc.correct = rawnand_sw_hamming_correct; + chip->ecc.correct = cs553x_ecc_correct; chip->ecc.strength = 1; return 0; -- GitLab From ad9ffdce453934cdc22fac0a0268119bd630260f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:35 +0200 Subject: [PATCH 0281/3804] mtd: rawnand: fsmc: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-3-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/fsmc_nand.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index bf695255b43a2..a3e66155ae405 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -432,6 +433,15 @@ static int fsmc_read_hwecc_ecc1(struct nand_chip *chip, const u8 *data, return 0; } +static int fsmc_correct_ecc1(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); +} + /* Count the number of 0's in buff upto a max of max_bits */ static int count_written_bits(u8 *buff, int size, int max_bits) { @@ -917,7 +927,7 @@ static int fsmc_nand_attach_chip(struct nand_chip *nand) case NAND_ECC_ENGINE_TYPE_ON_HOST: dev_info(host->dev, "Using 1-bit HW ECC scheme\n"); nand->ecc.calculate = fsmc_read_hwecc_ecc1; - nand->ecc.correct = rawnand_sw_hamming_correct; + nand->ecc.correct = fsmc_correct_ecc1; nand->ecc.hwctl = fsmc_enable_hwecc; nand->ecc.bytes = 3; nand->ecc.strength = 1; -- GitLab From c4b7d7c480d607e4f52d310d9d16b194868d0917 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:36 +0200 Subject: [PATCH 0282/3804] mtd: rawnand: lpc32xx_slc: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Cc: Vladimir Zapolskiy Reported-by: Trevor Woerner Signed-off-by: Miquel Raynal Tested-by: Trevor Woerner Acked-by: Vladimir Zapolskiy Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-4-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/lpc32xx_slc.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/lpc32xx_slc.c b/drivers/mtd/nand/raw/lpc32xx_slc.c index 6b7269cfb7d83..d7dfc6fd85ca7 100644 --- a/drivers/mtd/nand/raw/lpc32xx_slc.c +++ b/drivers/mtd/nand/raw/lpc32xx_slc.c @@ -27,6 +27,7 @@ #include #include #include +#include #define LPC32XX_MODNAME "lpc32xx-nand" @@ -344,6 +345,18 @@ static int lpc32xx_nand_ecc_calculate(struct nand_chip *chip, return 0; } +/* + * Corrects the data + */ +static int lpc32xx_nand_ecc_correct(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); +} + /* * Read a single byte from NAND device */ @@ -802,7 +815,7 @@ static int lpc32xx_nand_attach_chip(struct nand_chip *chip) chip->ecc.write_oob = lpc32xx_nand_write_oob_syndrome; chip->ecc.read_oob = lpc32xx_nand_read_oob_syndrome; chip->ecc.calculate = lpc32xx_nand_ecc_calculate; - chip->ecc.correct = rawnand_sw_hamming_correct; + chip->ecc.correct = lpc32xx_nand_ecc_correct; chip->ecc.hwctl = lpc32xx_nand_ecc_enable; /* -- GitLab From 3e09c0252501829b14b10f14e1982aaab77d0b80 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:37 +0200 Subject: [PATCH 0283/3804] mtd: rawnand: ndfc: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-5-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/ndfc.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/ndfc.c b/drivers/mtd/nand/raw/ndfc.c index 338d6b1a189eb..98d5a94c3a242 100644 --- a/drivers/mtd/nand/raw/ndfc.c +++ b/drivers/mtd/nand/raw/ndfc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -100,6 +101,15 @@ static int ndfc_calculate_ecc(struct nand_chip *chip, return 0; } +static int ndfc_correct_ecc(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); +} + /* * Speedups for buffer read/write/verify * @@ -145,7 +155,7 @@ static int ndfc_chip_init(struct ndfc_controller *ndfc, chip->controller = &ndfc->ndfc_control; chip->legacy.read_buf = ndfc_read_buf; chip->legacy.write_buf = ndfc_write_buf; - chip->ecc.correct = rawnand_sw_hamming_correct; + chip->ecc.correct = ndfc_correct_ecc; chip->ecc.hwctl = ndfc_enable_hwecc; chip->ecc.calculate = ndfc_calculate_ecc; chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_ON_HOST; -- GitLab From 46fcb57e6b7283533ebf8ba17a6bd30fa88bdc9f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:38 +0200 Subject: [PATCH 0284/3804] mtd: rawnand: sharpsl: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-6-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/sharpsl.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/mtd/nand/raw/sharpsl.c b/drivers/mtd/nand/raw/sharpsl.c index 5612ee628425b..2f1fe464e6637 100644 --- a/drivers/mtd/nand/raw/sharpsl.c +++ b/drivers/mtd/nand/raw/sharpsl.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -96,6 +97,15 @@ static int sharpsl_nand_calculate_ecc(struct nand_chip *chip, return readb(sharpsl->io + ECCCNTR) != 0; } +static int sharpsl_nand_correct_ecc(struct nand_chip *chip, + unsigned char *buf, + unsigned char *read_ecc, + unsigned char *calc_ecc) +{ + return ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); +} + static int sharpsl_attach_chip(struct nand_chip *chip) { if (chip->ecc.engine_type != NAND_ECC_ENGINE_TYPE_ON_HOST) @@ -106,7 +116,7 @@ static int sharpsl_attach_chip(struct nand_chip *chip) chip->ecc.strength = 1; chip->ecc.hwctl = sharpsl_nand_enable_hwecc; chip->ecc.calculate = sharpsl_nand_calculate_ecc; - chip->ecc.correct = rawnand_sw_hamming_correct; + chip->ecc.correct = sharpsl_nand_correct_ecc; return 0; } -- GitLab From 6a4c5ada577467a5f79e06f2c5e69c09983c22fb Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:39 +0200 Subject: [PATCH 0285/3804] mtd: rawnand: tmio: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-7-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/tmio_nand.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mtd/nand/raw/tmio_nand.c b/drivers/mtd/nand/raw/tmio_nand.c index de8e919d0ebe6..6d93dd31969b2 100644 --- a/drivers/mtd/nand/raw/tmio_nand.c +++ b/drivers/mtd/nand/raw/tmio_nand.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -292,11 +293,12 @@ static int tmio_nand_correct_data(struct nand_chip *chip, unsigned char *buf, int r0, r1; /* assume ecc.size = 512 and ecc.bytes = 6 */ - r0 = rawnand_sw_hamming_correct(chip, buf, read_ecc, calc_ecc); + r0 = ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); if (r0 < 0) return r0; - r1 = rawnand_sw_hamming_correct(chip, buf + 256, read_ecc + 3, - calc_ecc + 3); + r1 = ecc_sw_hamming_correct(buf + 256, read_ecc + 3, calc_ecc + 3, + chip->ecc.size, false); if (r1 < 0) return r1; return r0 + r1; -- GitLab From 3d227a0b0ce319edbff6fd0d8af4d66689e477cc Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 13 Apr 2021 18:18:40 +0200 Subject: [PATCH 0286/3804] mtd: rawnand: txx9ndfmc: Fix external use of SW Hamming ECC helper Since the Hamming software ECC engine has been updated to become a proper and independent ECC engine, it is now mandatory to either initialize the engine before using any one of his functions or use one of the bare helpers which only perform the calculations. As there is no actual need for a proper ECC initialization, let's just use the bare helper instead of the rawnand one. Fixes: 90ccf0a0192f ("mtd: nand: ecc-hamming: Rename the exported functions") Cc: stable@vger.kernel.org Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210413161840.345208-8-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/txx9ndfmc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/mtd/nand/raw/txx9ndfmc.c b/drivers/mtd/nand/raw/txx9ndfmc.c index 1a9449e53bf9d..b8894ac27073c 100644 --- a/drivers/mtd/nand/raw/txx9ndfmc.c +++ b/drivers/mtd/nand/raw/txx9ndfmc.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -193,8 +194,8 @@ static int txx9ndfmc_correct_data(struct nand_chip *chip, unsigned char *buf, int stat; for (eccsize = chip->ecc.size; eccsize > 0; eccsize -= 256) { - stat = rawnand_sw_hamming_correct(chip, buf, read_ecc, - calc_ecc); + stat = ecc_sw_hamming_correct(buf, read_ecc, calc_ecc, + chip->ecc.size, false); if (stat < 0) return stat; corrected += stat; -- GitLab From 562b4e91d3b221f737f84ff78ee7d348c8a6891f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Sat, 8 May 2021 19:32:14 +0200 Subject: [PATCH 0287/3804] mtd: parsers: ofpart: fix parsing subpartitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ofpart was recently patched to not scan random partition nodes as subpartitions. That change unfortunately broke scanning valid subpartitions like: partitions { compatible = "fixed-partitions"; #address-cells = <1>; #size-cells = <1>; partition@0 { compatible = "fixed-partitions"; label = "bootloader"; reg = <0x0 0x100000>; partition@0 { label = "config"; reg = <0x80000 0x80000>; }; }; }; Fix that regression by adding 1 more code path. We actually need 3 conditional blocks to support 3 possible cases. This change also makes code easier to understand & follow. Reported-by: David Bauer Fixes: 2d751203aacf ("mtd: parsers: ofpart: limit parsing of deprecated DT syntax Signed-off-by: Rafał Miłecki Tested-by: Andrew Cameron Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20210508173214.28365-1-zajec5@gmail.com --- drivers/mtd/parsers/ofpart_core.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/mtd/parsers/ofpart_core.c b/drivers/mtd/parsers/ofpart_core.c index 0fd8d2a0db973..192190c42fc84 100644 --- a/drivers/mtd/parsers/ofpart_core.c +++ b/drivers/mtd/parsers/ofpart_core.c @@ -57,20 +57,22 @@ static int parse_fixed_partitions(struct mtd_info *master, if (!mtd_node) return 0; - ofpart_node = of_get_child_by_name(mtd_node, "partitions"); - if (!ofpart_node && !master->parent) { - /* - * We might get here even when ofpart isn't used at all (e.g., - * when using another parser), so don't be louder than - * KERN_DEBUG - */ - pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n", - master->name, mtd_node); + if (!master->parent) { /* Master */ + ofpart_node = of_get_child_by_name(mtd_node, "partitions"); + if (!ofpart_node) { + /* + * We might get here even when ofpart isn't used at all (e.g., + * when using another parser), so don't be louder than + * KERN_DEBUG + */ + pr_debug("%s: 'partitions' subnode not found on %pOF. Trying to parse direct subnodes as partitions.\n", + master->name, mtd_node); + ofpart_node = mtd_node; + dedicated = false; + } + } else { /* Partition */ ofpart_node = mtd_node; - dedicated = false; } - if (!ofpart_node) - return 0; of_id = of_match_node(parse_ofpart_match_table, ofpart_node); if (dedicated && !of_id) { -- GitLab From 5311221304fa60e357aada75efdf2f2da8c30a57 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 6 May 2021 19:49:39 +0800 Subject: [PATCH 0288/3804] dt-bindings: phy: cadence-torrent: update reference file of docs In commit fd7abc3c5b87 ("phy: cadence-torrent: Use a common header file for Cadence SERDES"), phy-cadence-torrent.h was renamed to phy-cadence.h. Fix it of the Documentation. Signed-off-by: Wan Jiabing Link: https://lore.kernel.org/r/20210506114940.22215-1-wanjiabing@vivo.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml index 01dcd14e7b2ad..320a232c7208c 100644 --- a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml +++ b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml @@ -118,7 +118,7 @@ patternProperties: description: Specifies the Spread Spectrum Clocking mode used. It can be NO_SSC, EXTERNAL_SSC or INTERNAL_SSC. - Refer include/dt-bindings/phy/phy-cadence-torrent.h for the constants to be used. + Refer include/dt-bindings/phy/phy-cadence.h for the constants to be used. $ref: /schemas/types.yaml#/definitions/uint32 enum: [0, 1, 2] default: 0 -- GitLab From 0c8bd174f0fc131bc9dfab35cd8784f59045da87 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 8 May 2021 09:23:09 +0200 Subject: [PATCH 0289/3804] ACPI: scan: Fix a memory leak in an error handling path If 'acpi_device_set_name()' fails, we must free 'acpi_device_bus_id->bus_id' or there is a (potential) memory leak. Fixes: eb50aaf960e3 ("ACPI: scan: Use unique number for instance_no") Signed-off-by: Christophe JAILLET Reviewed-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index a22778e880c22..651a431e2bbf1 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -700,6 +700,7 @@ int acpi_device_add(struct acpi_device *device, result = acpi_device_set_name(device, acpi_device_bus_id); if (result) { + kfree_const(acpi_device_bus_id->bus_id); kfree(acpi_device_bus_id); goto err_unlock; } -- GitLab From a568814a55a0e82bbc7c7b51333d0c38e8fb5520 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 9 May 2021 14:39:21 +0300 Subject: [PATCH 0290/3804] RDMA/siw: Properly check send and receive CQ pointers The check for the NULL of pointer received from container_of() is incorrect by definition as it points to some offset from NULL. Change such check with proper NULL check of SIW QP attributes. Fixes: 303ae1cdfdf7 ("rdma/siw: application interface") Link: https://lore.kernel.org/r/a7535a82925f6f4c1f062abaa294f3ae6e54bdd2.1620560310.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_verbs.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index d2313efb26db8..917c8a919f387 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -300,7 +300,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, struct siw_ucontext *uctx = rdma_udata_to_drv_context(udata, struct siw_ucontext, base_ucontext); - struct siw_cq *scq = NULL, *rcq = NULL; unsigned long flags; int num_sqe, num_rqe, rv = 0; size_t length; @@ -343,10 +342,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, rv = -EINVAL; goto err_out; } - scq = to_siw_cq(attrs->send_cq); - rcq = to_siw_cq(attrs->recv_cq); - if (!scq || (!rcq && !attrs->srq)) { + if (!attrs->send_cq || (!attrs->recv_cq && !attrs->srq)) { siw_dbg(base_dev, "send CQ or receive CQ invalid\n"); rv = -EINVAL; goto err_out; @@ -401,8 +398,8 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, } } qp->pd = pd; - qp->scq = scq; - qp->rcq = rcq; + qp->scq = to_siw_cq(attrs->send_cq); + qp->rcq = to_siw_cq(attrs->recv_cq); if (attrs->srq) { /* -- GitLab From a3d83276d98886879b5bf7b30b7c29882754e4df Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Sun, 9 May 2021 14:41:38 +0300 Subject: [PATCH 0291/3804] RDMA/siw: Release xarray entry The xarray entry is allocated in siw_qp_add(), but release was missed in case zero-sized SQ was discovered. Fixes: 661f385961f0 ("RDMA/siw: Fix handling of zero-sized Read and Receive Queues.") Link: https://lore.kernel.org/r/f070b59d5a1114d5a4e830346755c2b3f141cde5.1620560472.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky Reviewed-by: Bernard Metzler Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/siw/siw_verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 917c8a919f387..3f175f220a229 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -375,7 +375,7 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, else { /* Zero sized SQ is not supported */ rv = -EINVAL; - goto err_out; + goto err_out_xa; } if (num_rqe) num_rqe = roundup_pow_of_two(num_rqe); -- GitLab From 54d87913f147a983589923c7f651f97de9af5be1 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 10 May 2021 17:46:00 +0300 Subject: [PATCH 0292/3804] RDMA/core: Prevent divide-by-zero error triggered by the user The user_entry_size is supplied by the user and later used as a denominator to calculate number of entries. The zero supplied by the user will trigger the following divide-by-zero error: divide error: 0000 [#1] SMP KASAN PTI CPU: 4 PID: 497 Comm: c_repro Not tainted 5.13.0-rc1+ #281 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:ib_uverbs_handler_UVERBS_METHOD_QUERY_GID_TABLE+0x1b1/0x510 Code: 87 59 03 00 00 e8 9f ab 1e ff 48 8d bd a8 00 00 00 e8 d3 70 41 ff 44 0f b7 b5 a8 00 00 00 e8 86 ab 1e ff 31 d2 4c 89 f0 31 ff <49> f7 f5 48 89 d6 48 89 54 24 10 48 89 04 24 e8 1b ad 1e ff 48 8b RSP: 0018:ffff88810416f828 EFLAGS: 00010246 RAX: 0000000000000008 RBX: 1ffff1102082df09 RCX: ffffffff82183f3d RDX: 0000000000000000 RSI: ffff888105f2da00 RDI: 0000000000000000 RBP: ffff88810416fa98 R08: 0000000000000001 R09: ffffed102082df5f R10: ffff88810416faf7 R11: ffffed102082df5e R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000008 R15: ffff88810416faf0 FS: 00007f5715efa740(0000) GS:ffff88811a700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000840 CR3: 000000010c2e0001 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? ib_uverbs_handler_UVERBS_METHOD_INFO_HANDLES+0x4b0/0x4b0 ib_uverbs_cmd_verbs+0x1546/0x1940 ib_uverbs_ioctl+0x186/0x240 __x64_sys_ioctl+0x38a/0x1220 do_syscall_64+0x3f/0x80 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 9f85cbe50aa0 ("RDMA/uverbs: Expose the new GID query API to user space") Link: https://lore.kernel.org/r/b971cc70a8b240a8b5eda33c99fa0558a0071be2.1620657876.git.leonro@nvidia.com Reviewed-by: Jason Gunthorpe Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_std_types_device.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/infiniband/core/uverbs_std_types_device.c b/drivers/infiniband/core/uverbs_std_types_device.c index 9ec6971056fa8..a03021d94e110 100644 --- a/drivers/infiniband/core/uverbs_std_types_device.c +++ b/drivers/infiniband/core/uverbs_std_types_device.c @@ -331,6 +331,9 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QUERY_GID_TABLE)( if (ret) return ret; + if (!user_entry_size) + return -EINVAL; + max_entries = uverbs_attr_ptr_get_array_size( attrs, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, user_entry_size); -- GitLab From c745253e2a691a40c66790defe85c104a887e14a Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Wed, 5 May 2021 14:09:15 +0300 Subject: [PATCH 0293/3804] PM: runtime: Fix unpaired parent child_count for force_resume As pm_runtime_need_not_resume() relies also on usage_count, it can return a different value in pm_runtime_force_suspend() compared to when called in pm_runtime_force_resume(). Different return values can happen if anything calls PM runtime functions in between, and causes the parent child_count to increase on every resume. So far I've seen the issue only for omapdrm that does complicated things with PM runtime calls during system suspend for legacy reasons: omap_atomic_commit_tail() for omapdrm.0 dispc_runtime_get() wakes up 58000000.dss as it's the dispc parent dispc_runtime_resume() rpm_resume() increases parent child_count dispc_runtime_put() won't idle, PM runtime suspend blocked pm_runtime_force_suspend() for 58000000.dss, !pm_runtime_need_not_resume() __update_runtime_status() system suspended pm_runtime_force_resume() for 58000000.dss, pm_runtime_need_not_resume() pm_runtime_enable() only called because of pm_runtime_need_not_resume() omap_atomic_commit_tail() for omapdrm.0 dispc_runtime_get() wakes up 58000000.dss as it's the dispc parent dispc_runtime_resume() rpm_resume() increases parent child_count dispc_runtime_put() won't idle, PM runtime suspend blocked ... rpm_suspend for 58000000.dss but parent child_count is now unbalanced Let's fix the issue by adding a flag for needs_force_resume and use it in pm_runtime_force_resume() instead of pm_runtime_need_not_resume(). Additionally omapdrm system suspend could be simplified later on to avoid lots of unnecessary PM runtime calls and the complexity it adds. The driver can just use internal functions that are shared between the PM runtime and system suspend related functions. Fixes: 4918e1f87c5f ("PM / runtime: Rework pm_runtime_force_suspend/resume()") Signed-off-by: Tony Lindgren Reviewed-by: Ulf Hansson Tested-by: Tomi Valkeinen Cc: 4.16+ # 4.16+ Signed-off-by: Rafael J. Wysocki --- drivers/base/power/runtime.c | 10 +++++++--- include/linux/pm.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 1fc1a992f90ca..b570848d23e0e 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1637,6 +1637,7 @@ void pm_runtime_init(struct device *dev) dev->power.request_pending = false; dev->power.request = RPM_REQ_NONE; dev->power.deferred_resume = false; + dev->power.needs_force_resume = 0; INIT_WORK(&dev->power.work, pm_runtime_work); dev->power.timer_expires = 0; @@ -1804,10 +1805,12 @@ int pm_runtime_force_suspend(struct device *dev) * its parent, but set its status to RPM_SUSPENDED anyway in case this * function will be called again for it in the meantime. */ - if (pm_runtime_need_not_resume(dev)) + if (pm_runtime_need_not_resume(dev)) { pm_runtime_set_suspended(dev); - else + } else { __update_runtime_status(dev, RPM_SUSPENDED); + dev->power.needs_force_resume = 1; + } return 0; @@ -1834,7 +1837,7 @@ int pm_runtime_force_resume(struct device *dev) int (*callback)(struct device *); int ret = 0; - if (!pm_runtime_status_suspended(dev) || pm_runtime_need_not_resume(dev)) + if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume) goto out; /* @@ -1853,6 +1856,7 @@ int pm_runtime_force_resume(struct device *dev) pm_runtime_mark_last_busy(dev); out: + dev->power.needs_force_resume = 0; pm_runtime_enable(dev); return ret; } diff --git a/include/linux/pm.h b/include/linux/pm.h index c9657408fee1a..1d8209c09686c 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -601,6 +601,7 @@ struct dev_pm_info { unsigned int idle_notification:1; unsigned int request_pending:1; unsigned int deferred_resume:1; + unsigned int needs_force_resume:1; unsigned int runtime_auto:1; bool ignore_children:1; unsigned int no_callbacks:1; -- GitLab From 37a8024d265564eba680575df6421f19db21dfce Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 7 May 2021 11:59:05 -0700 Subject: [PATCH 0294/3804] arm64: mte: initialize RGSR_EL1.SEED in __cpu_setup A valid implementation choice for the ChooseRandomNonExcludedTag() pseudocode function used by IRG is to behave in the same way as with GCR_EL1.RRND=0. This would mean that RGSR_EL1.SEED is used as an LFSR which must have a non-zero value in order for IRG to properly produce pseudorandom numbers. However, RGSR_EL1 is reset to an UNKNOWN value on soft reset and thus may reset to 0. Therefore we must initialize RGSR_EL1.SEED to a non-zero value in order to ensure that IRG behaves as expected. Signed-off-by: Peter Collingbourne Fixes: 3b714d24ef17 ("arm64: mte: CPU feature detection and initial sysreg configuration") Cc: # 5.10 Link: https://linux-review.googlesource.com/id/I2b089b6c7d6f17ee37e2f0db7df5ad5bcc04526c Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20210507185905.1745402-1-pcc@google.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/proc.S | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 0a48191534ff6..97d7bcd8d4f26 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -447,6 +447,18 @@ SYM_FUNC_START(__cpu_setup) mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK) msr_s SYS_GCR_EL1, x10 + /* + * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then + * RGSR_EL1.SEED must be non-zero for IRG to produce + * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we + * must initialize it. + */ + mrs x10, CNTVCT_EL0 + ands x10, x10, #SYS_RGSR_EL1_SEED_MASK + csinc x10, x10, xzr, ne + lsl x10, x10, #SYS_RGSR_EL1_SEED_SHIFT + msr_s SYS_RGSR_EL1, x10 + /* clear any pending tag check faults in TFSR*_EL1 */ msr_s SYS_TFSR_EL1, xzr msr_s SYS_TFSRE0_EL1, xzr -- GitLab From f79f7a2d96769d2a3e663a3e673066be77c30cc3 Mon Sep 17 00:00:00 2001 From: Bhaskar Chowdhury Date: Mon, 22 Mar 2021 17:58:19 +0530 Subject: [PATCH 0295/3804] arc: Fix typos/spellos s/commiting/committing/ s/defintion/definition/ s/gaurantees/guarantees/ s/interrpted/interrupted/ s/interrutps/interrupts/ s/succeded/succeeded/ s/unconditonally/unconditionally/ Reviewed-by: Christian Brauner Acked-by: Randy Dunlap Signed-off-by: Bhaskar Chowdhury Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 2 +- arch/arc/include/asm/cmpxchg.h | 4 ++-- arch/arc/kernel/process.c | 8 ++++---- arch/arc/kernel/signal.c | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arc/Makefile b/arch/arc/Makefile index 4392c9c189c4d..e47adc97a89bf 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -31,7 +31,7 @@ endif ifdef CONFIG_ARC_CURR_IN_REG -# For a global register defintion, make sure it gets passed to every file +# For a global register definition, make sure it gets passed to every file # We had a customer reported bug where some code built in kernel was NOT using # any kernel headers, and missing the r25 global register # Can't do unconditionally because of recursive include issues diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h index 9b87e162e539b..dfeffa25499bf 100644 --- a/arch/arc/include/asm/cmpxchg.h +++ b/arch/arc/include/asm/cmpxchg.h @@ -116,7 +116,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, * * Technically the lock is also needed for UP (boils down to irq save/restore) * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to - * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg() + * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg() * Other way around, xchg is one instruction anyways, so can't be interrupted * as such */ @@ -143,7 +143,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr, /* * "atomic" variant of xchg() * REQ: It needs to follow the same serialization rules as other atomic_xxx() - * Since xchg() doesn't always do that, it would seem that following defintion + * Since xchg() doesn't always do that, it would seem that following definition * is incorrect. But here's the rationale: * SMP : Even xchg() takes the atomic_ops_lock, so OK. * LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index d838d0d576964..3793876f42d9b 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -50,14 +50,14 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) int ret; /* - * This is only for old cores lacking LLOCK/SCOND, which by defintion + * This is only for old cores lacking LLOCK/SCOND, which by definition * can't possibly be SMP. Thus doesn't need to be SMP safe. * And this also helps reduce the overhead for serializing in * the UP case */ WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP)); - /* Z indicates to userspace if operation succeded */ + /* Z indicates to userspace if operation succeeded */ regs->status32 &= ~STATUS_Z_MASK; ret = access_ok(uaddr, sizeof(*uaddr)); @@ -107,7 +107,7 @@ fail: void arch_cpu_idle(void) { - /* Re-enable interrupts <= default irq priority before commiting SLEEP */ + /* Re-enable interrupts <= default irq priority before committing SLEEP */ const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO; __asm__ __volatile__( @@ -120,7 +120,7 @@ void arch_cpu_idle(void) void arch_cpu_idle(void) { - /* sleep, but enable both set E1/E2 (levels of interrutps) before committing */ + /* sleep, but enable both set E1/E2 (levels of interrupts) before committing */ __asm__ __volatile__("sleep 0x3 \n"); } diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index fdbe06c98895e..b3ccb9e5ffe42 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -259,7 +259,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) regs->r2 = (unsigned long)&sf->uc; /* - * small optim to avoid unconditonally calling do_sigaltstack + * small optim to avoid unconditionally calling do_sigaltstack * in sigreturn path, now that we only have rt_sigreturn */ magic = MAGIC_SIGALTSTK; @@ -391,7 +391,7 @@ void do_signal(struct pt_regs *regs) void do_notify_resume(struct pt_regs *regs) { /* - * ASM glue gaurantees that this is only called when returning to + * ASM glue guarantees that this is only called when returning to * user mode */ if (test_thread_flag(TIF_NOTIFY_RESUME)) -- GitLab From 8e97bf39fa0361af3e64739b3766992b9dafa11d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 21 Apr 2021 22:16:53 -0700 Subject: [PATCH 0296/3804] ARC: kgdb: add 'fallthrough' to prevent a warning Use the 'fallthrough' macro to document that this switch case does indeed fall through to the next case. ../arch/arc/kernel/kgdb.c: In function 'kgdb_arch_handle_exception': ../arch/arc/kernel/kgdb.c:141:6: warning: this statement may fall through [-Wimplicit-fallthrough=] 141 | if (kgdb_hex2long(&ptr, &addr)) | ^ ../arch/arc/kernel/kgdb.c:144:2: note: here 144 | case 'D': | ^~~~ Cc: linux-snps-arc@lists.infradead.org Signed-off-by: Randy Dunlap Signed-off-by: Vineet Gupta --- arch/arc/kernel/kgdb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index ecfbc42d3a40f..345a0000554cb 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -140,6 +140,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, ptr = &remcomInBuffer[1]; if (kgdb_hex2long(&ptr, &addr)) regs->ret = addr; + fallthrough; case 'D': case 'k': -- GitLab From 3433adc8bd09fc9f29b8baddf33b4ecd1ecd2cdc Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 23 Apr 2021 12:16:25 -0700 Subject: [PATCH 0297/3804] ARC: entry: fix off-by-one error in syscall number validation We have NR_syscall syscalls from [0 .. NR_syscall-1]. However the check for invalid syscall number is "> NR_syscall" as opposed to >=. This off-by-one error erronesously allows "NR_syscall" to be treated as valid syscall causeing out-of-bounds access into syscall-call table ensuing a crash (holes within syscall table have a invalid-entry handler but this is beyond the array implementing the table). This problem showed up on v5.6 kernel when testing glibc 2.33 (v5.10 kernel capable, includng faccessat2 syscall 439). The v5.6 kernel has NR_syscalls=439 (0 to 438). Due to the bug, 439 passed by glibc was not handled as -ENOSYS but processed leading to a crash. Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/48 Reported-by: Shahab Vahedi Cc: Signed-off-by: Vineet Gupta --- arch/arc/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S index 1743506081da6..2cb8dfe866b66 100644 --- a/arch/arc/kernel/entry.S +++ b/arch/arc/kernel/entry.S @@ -177,7 +177,7 @@ tracesys: ; Do the Sys Call as we normally would. ; Validate the Sys Call number - cmp r8, NR_syscalls + cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi tracesys_exit @@ -255,7 +255,7 @@ ENTRY(EV_Trap) ;============ Normal syscall case ; syscall num shd not exceed the total system calls avail - cmp r8, NR_syscalls + cmp r8, NR_syscalls - 1 mov.hi r0, -ENOSYS bhi .Lret_from_system_call -- GitLab From c5f756d8c6265ebb1736a7787231f010a3b782e5 Mon Sep 17 00:00:00 2001 From: Vladimir Isaev Date: Tue, 27 Apr 2021 15:12:37 +0300 Subject: [PATCH 0298/3804] ARC: mm: PAE: use 40-bit physical page mask 32-bit PAGE_MASK can not be used as a mask for physical addresses when PAE is enabled. PAGE_MASK_PHYS must be used for physical addresses instead of PAGE_MASK. Without this, init gets SIGSEGV if pte_modify was called: | potentially unexpected fatal signal 11. | Path: /bin/busybox | CPU: 0 PID: 1 Comm: init Not tainted 5.12.0-rc5-00003-g1e43c377a79f-dirty | Insn could not be fetched | @No matching VMA found | ECR: 0x00040000 EFA: 0x00000000 ERET: 0x00000000 | STAT: 0x80080082 [IE U ] BTA: 0x00000000 | SP: 0x5f9ffe44 FP: 0x00000000 BLK: 0xaf3d4 | LPS: 0x000d093e LPE: 0x000d0950 LPC: 0x00000000 | r00: 0x00000002 r01: 0x5f9fff14 r02: 0x5f9fff20 | ... | Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b Signed-off-by: Vladimir Isaev Reported-by: kernel test robot Cc: Vineet Gupta Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta --- arch/arc/include/asm/page.h | 12 ++++++++++++ arch/arc/include/asm/pgtable.h | 12 +++--------- arch/arc/include/uapi/asm/page.h | 1 - arch/arc/mm/ioremap.c | 5 +++-- arch/arc/mm/tlb.c | 2 +- 5 files changed, 19 insertions(+), 13 deletions(-) diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index ad9b7fe4dba36..4a9d33372fe2b 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -7,6 +7,18 @@ #include +#ifdef CONFIG_ARC_HAS_PAE40 + +#define MAX_POSSIBLE_PHYSMEM_BITS 40 +#define PAGE_MASK_PHYS (0xff00000000ull | PAGE_MASK) + +#else /* CONFIG_ARC_HAS_PAE40 */ + +#define MAX_POSSIBLE_PHYSMEM_BITS 32 +#define PAGE_MASK_PHYS PAGE_MASK + +#endif /* CONFIG_ARC_HAS_PAE40 */ + #ifndef __ASSEMBLY__ #define clear_page(paddr) memset((paddr), 0, PAGE_SIZE) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 163641726a2b9..5878846f00cfe 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -107,8 +107,8 @@ #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE) /* Set of bits not changed in pte_modify */ -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL) - +#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \ + _PAGE_SPECIAL) /* More Abbrevaited helpers */ #define PAGE_U_NONE __pgprot(___DEF) #define PAGE_U_R __pgprot(___DEF | _PAGE_READ) @@ -132,13 +132,7 @@ #define PTE_BITS_IN_PD0 (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ) #define PTE_BITS_RWX (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ) -#ifdef CONFIG_ARC_HAS_PAE40 -#define PTE_BITS_NON_RWX_IN_PD1 (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE) -#define MAX_POSSIBLE_PHYSMEM_BITS 40 -#else -#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK | _PAGE_CACHEABLE) -#define MAX_POSSIBLE_PHYSMEM_BITS 32 -#endif +#define PTE_BITS_NON_RWX_IN_PD1 (PAGE_MASK_PHYS | _PAGE_CACHEABLE) /************************************************************************** * Mapping of vm_flags (Generic VM) to PTE flags (arch specific) diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h index 2a97e2718a219..2a4ad619abfba 100644 --- a/arch/arc/include/uapi/asm/page.h +++ b/arch/arc/include/uapi/asm/page.h @@ -33,5 +33,4 @@ #define PAGE_MASK (~(PAGE_SIZE-1)) - #endif /* _UAPI__ASM_ARC_PAGE_H */ diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c index fac4adc902044..95c649fbc95af 100644 --- a/arch/arc/mm/ioremap.c +++ b/arch/arc/mm/ioremap.c @@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap); void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, unsigned long flags) { + unsigned int off; unsigned long vaddr; struct vm_struct *area; - phys_addr_t off, end; + phys_addr_t end; pgprot_t prot = __pgprot(flags); /* Don't allow wraparound, zero size */ @@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size, /* Mappings have to be page-aligned */ off = paddr & ~PAGE_MASK; - paddr &= PAGE_MASK; + paddr &= PAGE_MASK_PHYS; size = PAGE_ALIGN(end + 1) - paddr; /* diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 9bb3c24f36770..9c7c682472896 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -576,7 +576,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned, pte_t *ptep) { unsigned long vaddr = vaddr_unaligned & PAGE_MASK; - phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK; + phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS; struct page *page = pfn_to_page(pte_pfn(*ptep)); create_tlb(vma, vaddr, ptep); -- GitLab From 1d5e4640e5df15252398c1b621f6bd432f2d7f17 Mon Sep 17 00:00:00 2001 From: Vladimir Isaev Date: Tue, 27 Apr 2021 15:13:54 +0300 Subject: [PATCH 0299/3804] ARC: mm: Use max_high_pfn as a HIGHMEM zone border Commit 4af22ded0ecf ("arc: fix memory initialization for systems with two memory banks") fixed highmem, but for the PAE case it causes bug messages: | BUG: Bad page state in process swapper pfn:80000 | page:(ptrval) refcount:0 mapcount:1 mapping:00000000 index:0x0 pfn:0x80000 flags: 0x0() | raw: 00000000 00000100 00000122 00000000 00000000 00000000 00000000 00000000 | raw: 00000000 | page dumped because: nonzero mapcount | Modules linked in: | CPU: 0 PID: 0 Comm: swapper Not tainted 5.12.0-rc5-00003-g1e43c377a79f #1 This is because the fix expects highmem to be always less than lowmem and uses min_low_pfn as an upper zone border for highmem. max_high_pfn should be ok for both highmem and highmem+PAE cases. Fixes: 4af22ded0ecf ("arc: fix memory initialization for systems with two memory banks") Signed-off-by: Vladimir Isaev Cc: Mike Rapoport Cc: stable@vger.kernel.org #5.8 onwards Signed-off-by: Vineet Gupta --- arch/arc/mm/init.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c index 33832e36bdb7d..e2ed355438c96 100644 --- a/arch/arc/mm/init.c +++ b/arch/arc/mm/init.c @@ -157,7 +157,16 @@ void __init setup_arch_memory(void) min_high_pfn = PFN_DOWN(high_mem_start); max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz); - max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn; + /* + * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE. + * For HIGHMEM without PAE max_high_pfn should be less than + * min_low_pfn to guarantee that these two regions don't overlap. + * For PAE case highmem is greater than lowmem, so it is natural + * to use max_high_pfn. + * + * In both cases, holes should be handled by pfn_valid(). + */ + max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn; high_memory = (void *)(min_high_pfn << PAGE_SHIFT); -- GitLab From bf9e262fcfa6350269f00a95658f701f2595db13 Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Sat, 8 May 2021 11:07:33 +0800 Subject: [PATCH 0300/3804] docs/zh_CN: Remove obsolete translation file This translation file was replaced by Documentation/translations/zh_CN/admin-guide/security-bugs.rst which was created in commit 2d153571003b ("docs/zh_CN: Add zh_CN/admin-guide/security-bugs.rst"). This is a translation left over from history. Remove it. Signed-off-by: Wan Jiabing Acked-by: Wu XiangCheng Link: https://lore.kernel.org/r/20210508030741.82655-1-wanjiabing@vivo.com Signed-off-by: Jonathan Corbet --- Documentation/translations/zh_CN/SecurityBugs | 50 ------------------- 1 file changed, 50 deletions(-) delete mode 100644 Documentation/translations/zh_CN/SecurityBugs diff --git a/Documentation/translations/zh_CN/SecurityBugs b/Documentation/translations/zh_CN/SecurityBugs deleted file mode 100644 index 2d0fffd122cee..0000000000000 --- a/Documentation/translations/zh_CN/SecurityBugs +++ /dev/null @@ -1,50 +0,0 @@ -Chinese translated version of Documentation/admin-guide/security-bugs.rst - -If you have any comment or update to the content, please contact the -original document maintainer directly. However, if you have a problem -communicating in English you can also ask the Chinese maintainer for -help. Contact the Chinese maintainer if this translation is outdated -or if there is a problem with the translation. - -Chinese maintainer: Harry Wei ---------------------------------------------------------------------- -Documentation/admin-guide/security-bugs.rst 的中文翻译 - -如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文 -交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻 -译存在问题,请联系中文版维护者。 - -中文版维护者: 贾威威 Harry Wei -中文版翻译者: 贾威威 Harry Wei -中文版校译者: 贾威威 Harry Wei - - -以下为正文 ---------------------------------------------------------------------- -Linux内核开发者认为安全非常重要。因此,我们想要知道当一个有关于 -安全的漏洞被发现的时候,并且它可能会被尽快的修复或者公开。请把这个安全 -漏洞报告给Linux内核安全团队。 - -1) 联系 - -linux内核安全团队可以通过email来联系。这是 -一组独立的安全工作人员,可以帮助改善漏洞报告并且公布和取消一个修复。安 -全团队有可能会从部分的维护者那里引进额外的帮助来了解并且修复安全漏洞。 -当遇到任何漏洞,所能提供的信息越多就越能诊断和修复。如果你不清楚什么 -是有帮助的信息,那就请重温一下admin-guide/reporting-bugs.rst文件中的概述过程。任 -何攻击性的代码都是非常有用的,未经报告者的同意不会被取消,除非它已经 -被公布于众。 - -2) 公开 - -Linux内核安全团队的宗旨就是和漏洞提交者一起处理漏洞的解决方案直 -到公开。我们喜欢尽快地完全公开漏洞。当一个漏洞或者修复还没有被完全地理 -解,解决方案没有通过测试或者供应商协调,可以合理地延迟公开。然而,我们 -期望这些延迟尽可能的短些,是可数的几天,而不是几个星期或者几个月。公开 -日期是通过安全团队和漏洞提供者以及供应商洽谈后的结果。公开时间表是从很 -短(特殊的,它已经被公众所知道)到几个星期。作为一个基本的默认政策,我 -们所期望通知公众的日期是7天的安排。 - -3) 保密协议 - -Linux内核安全团队不是一个正式的团体,因此不能加入任何的保密协议。 -- GitLab From 9e255e2b9afe948fb795cbaa854acc3904d4212c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 6 May 2021 16:19:07 -0700 Subject: [PATCH 0301/3804] Documentation: drop optional BOMs A few of the Documentation .rst files begin with a Unicode byte order mark (BOM). The BOM may signify endianess for 16-bit or 32-bit encodings or indicate that the text stream is indeed Unicode. We don't need it for either of those uses. It may also interfere with (confuse) some software. Since we don't need it and its use is optional, just delete the uses of it in Documentation/. https://en.wikipedia.org/wiki/Byte_order_mark Signed-off-by: Randy Dunlap Reviewed-by: Matthew Wilcox (Oracle) Reviewed-by: Greg Kroah-Hartman Cc: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/20210506231907.14359-1-rdunlap@infradead.org Signed-off-by: Jonathan Corbet --- Documentation/block/data-integrity.rst | 2 +- Documentation/process/kernel-enforcement-statement.rst | 2 +- Documentation/security/tpm/xen-tpmfront.rst | 2 +- Documentation/timers/no_hz.rst | 2 +- Documentation/usb/mtouchusb.rst | 2 +- Documentation/usb/usb-serial.rst | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst index 4f2452a95c434..07a97aa266685 100644 --- a/Documentation/block/data-integrity.rst +++ b/Documentation/block/data-integrity.rst @@ -1,4 +1,4 @@ -============== +============== Data Integrity ============== diff --git a/Documentation/process/kernel-enforcement-statement.rst b/Documentation/process/kernel-enforcement-statement.rst index e5a1be4760476..dc2d813b2e793 100644 --- a/Documentation/process/kernel-enforcement-statement.rst +++ b/Documentation/process/kernel-enforcement-statement.rst @@ -1,4 +1,4 @@ -.. _process_statement_kernel: +.. _process_statement_kernel: Linux Kernel Enforcement Statement ---------------------------------- diff --git a/Documentation/security/tpm/xen-tpmfront.rst b/Documentation/security/tpm/xen-tpmfront.rst index 00d5b1db227d4..31c67522f2ade 100644 --- a/Documentation/security/tpm/xen-tpmfront.rst +++ b/Documentation/security/tpm/xen-tpmfront.rst @@ -1,4 +1,4 @@ -============================= +============================= Virtual TPM interface for Xen ============================= diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst index c4c70e1aada3c..6cadad7c3aad4 100644 --- a/Documentation/timers/no_hz.rst +++ b/Documentation/timers/no_hz.rst @@ -1,4 +1,4 @@ -====================================== +====================================== NO_HZ: Reducing Scheduling-Clock Ticks ====================================== diff --git a/Documentation/usb/mtouchusb.rst b/Documentation/usb/mtouchusb.rst index d1111b74bf759..5ae1f74fe74b6 100644 --- a/Documentation/usb/mtouchusb.rst +++ b/Documentation/usb/mtouchusb.rst @@ -1,4 +1,4 @@ -================ +================ mtouchusb driver ================ diff --git a/Documentation/usb/usb-serial.rst b/Documentation/usb/usb-serial.rst index 8fa7dbd3da9a4..69586aeb60bb4 100644 --- a/Documentation/usb/usb-serial.rst +++ b/Documentation/usb/usb-serial.rst @@ -1,4 +1,4 @@ -========== +========== USB serial ========== -- GitLab From 8ab78863e9eff11910e1ac8bcf478060c29b379e Mon Sep 17 00:00:00 2001 From: Jeimon Date: Sat, 8 May 2021 11:52:30 +0800 Subject: [PATCH 0302/3804] net/nfc/rawsock.c: fix a permission check bug The function rawsock_create() calls a privileged function sk_alloc(), which requires a ns-aware check to check net->user_ns, i.e., ns_capable(). However, the original code checks the init_user_ns using capable(). So we replace the capable() with ns_capable(). Signed-off-by: Jeimon Signed-off-by: David S. Miller --- net/nfc/rawsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 9c7eb8455ba8e..5f1d438a0a23f 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -329,7 +329,7 @@ static int rawsock_create(struct net *net, struct socket *sock, return -ESOCKTNOSUPPORT; if (sock->type == SOCK_RAW) { - if (!capable(CAP_NET_RAW)) + if (!ns_capable(net->user_ns, CAP_NET_RAW)) return -EPERM; sock->ops = &rawsock_raw_ops; } else { -- GitLab From ddb6e00f8413e885ff826e32521cff7924661de0 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 8 May 2021 07:38:22 +0200 Subject: [PATCH 0303/3804] net: netcp: Fix an error message 'ret' is known to be 0 here. The expected error code is stored in 'tx_pipe->dma_queue', so use it instead. While at it, switch from %d to %pe which is more user friendly. Fixes: 84640e27f230 ("net: netcp: Add Keystone NetCP core ethernet driver") Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c index 9030e619e5436..97942b0e38975 100644 --- a/drivers/net/ethernet/ti/netcp_core.c +++ b/drivers/net/ethernet/ti/netcp_core.c @@ -1350,8 +1350,8 @@ int netcp_txpipe_open(struct netcp_tx_pipe *tx_pipe) tx_pipe->dma_queue = knav_queue_open(name, tx_pipe->dma_queue_id, KNAV_QUEUE_SHARED); if (IS_ERR(tx_pipe->dma_queue)) { - dev_err(dev, "Could not open DMA queue for channel \"%s\": %d\n", - name, ret); + dev_err(dev, "Could not open DMA queue for channel \"%s\": %pe\n", + name, tx_pipe->dma_queue); ret = PTR_ERR(tx_pipe->dma_queue); goto err; } -- GitLab From 0d3ae948741ac6d80e39ab27b45297367ee477de Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 14 Apr 2021 10:05:17 -0700 Subject: [PATCH 0304/3804] sh: Remove unused variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes this annoying warning: arch/sh/kernel/traps.c: In function ‘nmi_trap_handler’: arch/sh/kernel/traps.c:183:15: warning: unused variable ‘cpu’ [-Wunused-variable] 183 | unsigned int cpu = smp_processor_id(); Fixes: fe3f1d5d7cd3 ("sh: Get rid of nmi_count()") Signed-off-by: Eric Dumazet Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210414170517.1205430-1-eric.dumazet@gmail.com --- arch/sh/kernel/traps.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index f5beecdac6938..e76b221570999 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(void) { } BUILD_TRAP_HANDLER(nmi) { - unsigned int cpu = smp_processor_id(); TRAP_HANDLER_DECL; arch_ftrace_nmi_enter(); -- GitLab From a269333fa5c0c8e53c92b5a28a6076a28cde3e83 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 8 May 2021 16:30:35 +0300 Subject: [PATCH 0305/3804] net: dsa: fix a crash if ->get_sset_count() fails If ds->ops->get_sset_count() fails then it "count" is a negative error code such as -EOPNOTSUPP. Because "i" is an unsigned int, the negative error code is type promoted to a very high value and the loop will corrupt memory until the system crashes. Fix this by checking for error codes and changing the type of "i" to just int. Fixes: badf3ada60ab ("net: dsa: Provide CPU port statistics to master netdev") Signed-off-by: Dan Carpenter Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- net/dsa/master.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/dsa/master.c b/net/dsa/master.c index 052a977914a6d..63adbc21a735a 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -147,8 +147,7 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, struct dsa_switch *ds = cpu_dp->ds; int port = cpu_dp->index; int len = ETH_GSTRING_LEN; - int mcount = 0, count; - unsigned int i; + int mcount = 0, count, i; uint8_t pfx[4]; uint8_t *ndata; @@ -178,6 +177,8 @@ static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, */ ds->ops->get_strings(ds, port, stringset, ndata); count = ds->ops->get_sset_count(ds, port, stringset); + if (count < 0) + return; for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), ndata + i * len, len - sizeof(pfx)); -- GitLab From db825feefc6868896fed5e361787ba3bee2fd906 Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Sun, 9 May 2021 09:43:18 +0300 Subject: [PATCH 0306/3804] net/mlx4: Fix EEPROM dump support Fix SFP and QSFP* EEPROM queries by setting i2c_address, offset and page number correctly. For SFP set the following params: - I2C address for offsets 0-255 is 0x50. For 256-511 - 0x51. - Page number is zero. - Offset is 0-255. At the same time, QSFP* parameters are different: - I2C address is always 0x50. - Page number is not limited to zero. - Offset is 0-255 for page zero and 128-255 for others. To set parameters accordingly to cable used, implement function to query module ID and implement respective helper functions to set parameters correctly. Fixes: 135dd9594f12 ("net/mlx4_en: ethtool, Remove unsupported SFP EEPROM high pages query") Signed-off-by: Vladyslav Tarasiuk Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx4/en_ethtool.c | 4 +- drivers/net/ethernet/mellanox/mlx4/port.c | 107 +++++++++++++++++- 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 1434df66fcf2e..3616b77caa0ad 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -2027,8 +2027,6 @@ static int mlx4_en_set_tunable(struct net_device *dev, return ret; } -#define MLX4_EEPROM_PAGE_LEN 256 - static int mlx4_en_get_module_info(struct net_device *dev, struct ethtool_modinfo *modinfo) { @@ -2063,7 +2061,7 @@ static int mlx4_en_get_module_info(struct net_device *dev, break; case MLX4_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = MLX4_EEPROM_PAGE_LEN; + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; default: return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index ba6ac31a339dc..256a06b3c096b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -1973,6 +1973,7 @@ EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave); #define I2C_ADDR_LOW 0x50 #define I2C_ADDR_HIGH 0x51 #define I2C_PAGE_SIZE 256 +#define I2C_HIGH_PAGE_SIZE 128 /* Module Info Data */ struct mlx4_cable_info { @@ -2026,6 +2027,88 @@ static inline const char *cable_info_mad_err_str(u16 mad_status) return "Unknown Error"; } +static int mlx4_get_module_id(struct mlx4_dev *dev, u8 port, u8 *module_id) +{ + struct mlx4_cmd_mailbox *inbox, *outbox; + struct mlx4_mad_ifc *inmad, *outmad; + struct mlx4_cable_info *cable_info; + int ret; + + inbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(inbox)) + return PTR_ERR(inbox); + + outbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(outbox)) { + mlx4_free_cmd_mailbox(dev, inbox); + return PTR_ERR(outbox); + } + + inmad = (struct mlx4_mad_ifc *)(inbox->buf); + outmad = (struct mlx4_mad_ifc *)(outbox->buf); + + inmad->method = 0x1; /* Get */ + inmad->class_version = 0x1; + inmad->mgmt_class = 0x1; + inmad->base_version = 0x1; + inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */ + + cable_info = (struct mlx4_cable_info *)inmad->data; + cable_info->dev_mem_address = 0; + cable_info->page_num = 0; + cable_info->i2c_addr = I2C_ADDR_LOW; + cable_info->size = cpu_to_be16(1); + + ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3, + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); + if (ret) + goto out; + + if (be16_to_cpu(outmad->status)) { + /* Mad returned with bad status */ + ret = be16_to_cpu(outmad->status); + mlx4_warn(dev, + "MLX4_CMD_MAD_IFC Get Module ID attr(%x) port(%d) i2c_addr(%x) offset(%d) size(%d): Response Mad Status(%x) - %s\n", + 0xFF60, port, I2C_ADDR_LOW, 0, 1, ret, + cable_info_mad_err_str(ret)); + ret = -ret; + goto out; + } + cable_info = (struct mlx4_cable_info *)outmad->data; + *module_id = cable_info->data[0]; +out: + mlx4_free_cmd_mailbox(dev, inbox); + mlx4_free_cmd_mailbox(dev, outbox); + return ret; +} + +static void mlx4_sfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset) +{ + *i2c_addr = I2C_ADDR_LOW; + *page_num = 0; + + if (*offset < I2C_PAGE_SIZE) + return; + + *i2c_addr = I2C_ADDR_HIGH; + *offset -= I2C_PAGE_SIZE; +} + +static void mlx4_qsfp_eeprom_params_set(u8 *i2c_addr, u8 *page_num, u16 *offset) +{ + /* Offsets 0-255 belong to page 0. + * Offsets 256-639 belong to pages 01, 02, 03. + * For example, offset 400 is page 02: 1 + (400 - 256) / 128 = 2 + */ + if (*offset < I2C_PAGE_SIZE) + *page_num = 0; + else + *page_num = 1 + (*offset - I2C_PAGE_SIZE) / I2C_HIGH_PAGE_SIZE; + *i2c_addr = I2C_ADDR_LOW; + *offset -= *page_num * I2C_HIGH_PAGE_SIZE; +} + /** * mlx4_get_module_info - Read cable module eeprom data * @dev: mlx4_dev. @@ -2045,12 +2128,30 @@ int mlx4_get_module_info(struct mlx4_dev *dev, u8 port, struct mlx4_cmd_mailbox *inbox, *outbox; struct mlx4_mad_ifc *inmad, *outmad; struct mlx4_cable_info *cable_info; - u16 i2c_addr; + u8 module_id, i2c_addr, page_num; int ret; if (size > MODULE_INFO_MAX_READ) size = MODULE_INFO_MAX_READ; + ret = mlx4_get_module_id(dev, port, &module_id); + if (ret) + return ret; + + switch (module_id) { + case MLX4_MODULE_ID_SFP: + mlx4_sfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + break; + case MLX4_MODULE_ID_QSFP: + case MLX4_MODULE_ID_QSFP_PLUS: + case MLX4_MODULE_ID_QSFP28: + mlx4_qsfp_eeprom_params_set(&i2c_addr, &page_num, &offset); + break; + default: + mlx4_err(dev, "Module ID not recognized: %#x\n", module_id); + return -EINVAL; + } + inbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(inbox)) return PTR_ERR(inbox); @@ -2076,11 +2177,9 @@ int mlx4_get_module_info(struct mlx4_dev *dev, u8 port, */ size -= offset + size - I2C_PAGE_SIZE; - i2c_addr = I2C_ADDR_LOW; - cable_info = (struct mlx4_cable_info *)inmad->data; cable_info->dev_mem_address = cpu_to_be16(offset); - cable_info->page_num = 0; + cable_info->page_num = page_num; cable_info->i2c_addr = i2c_addr; cable_info->size = cpu_to_be16(size); -- GitLab From b94cbc909f1d80378a1f541968309e5c1178c98b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 9 May 2021 22:33:38 +0300 Subject: [PATCH 0307/3804] net: dsa: fix error code getting shifted with 4 in dsa_slave_get_sset_count DSA implements a bunch of 'standardized' ethtool statistics counters, namely tx_packets, tx_bytes, rx_packets, rx_bytes. So whatever the hardware driver returns in .get_sset_count(), we need to add 4 to that. That is ok, except that .get_sset_count() can return a negative error code, for example: b53_get_sset_count -> phy_ethtool_get_sset_count -> return -EIO -EIO is -5, and with 4 added to it, it becomes -1, aka -EPERM. One can imagine that certain error codes may even become positive, although based on code inspection I did not see instances of that. Check the error code first, if it is negative return it as-is. Based on a similar patch for dsa_master_get_strings from Dan Carpenter: https://patchwork.kernel.org/project/netdevbpf/patch/YJaSe3RPgn7gKxZv@mwanda/ Fixes: 91da11f870f0 ("net: Distributed Switch Architecture protocol support") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- net/dsa/slave.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 8c0f3c6ab3654..d4756b9201089 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -776,13 +776,15 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) struct dsa_switch *ds = dp->ds; if (sset == ETH_SS_STATS) { - int count; + int count = 0; - count = 4; - if (ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds, dp->index, sset); + if (ds->ops->get_sset_count) { + count = ds->ops->get_sset_count(ds, dp->index, sset); + if (count < 0) + return count; + } - return count; + return count + 4; } else if (sset == ETH_SS_TEST) { return net_selftest_get_count(); } -- GitLab From 3058e01d31bbdbe50e02cafece2b22817a6a0eae Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Mon, 10 May 2021 09:57:38 +0700 Subject: [PATCH 0308/3804] tipc: make node link identity publish thread safe The using of the node address and node link identity are not thread safe, meaning that two publications may be published the same values, as result one of them will get failure because of already existing in the name table. To avoid this we have to use the node address and node link identity values from inside the node item's write lock protection. Fixes: 50a3499ab853 ("tipc: simplify signature of tipc_namtbl_publish()") Acked-by: Jon Maloy Signed-off-by: Hoang Le Signed-off-by: David S. Miller --- net/tipc/node.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 8217905348f48..81af92954c6c2 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -423,18 +423,18 @@ static void tipc_node_write_unlock(struct tipc_node *n) write_unlock_bh(&n->lock); if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, n->addr, n->capabilities); + tipc_publ_notify(net, publ_list, sk.node, n->capabilities); if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, n->addr, n->capabilities); + tipc_named_node_up(net, sk.node, n->capabilities); if (flags & TIPC_NOTIFY_LINK_UP) { - tipc_mon_peer_up(net, n->addr, bearer_id); - tipc_nametbl_publish(net, &ua, &sk, n->link_id); + tipc_mon_peer_up(net, sk.node, bearer_id); + tipc_nametbl_publish(net, &ua, &sk, sk.ref); } if (flags & TIPC_NOTIFY_LINK_DOWN) { - tipc_mon_peer_down(net, n->addr, bearer_id); - tipc_nametbl_withdraw(net, &ua, &sk, n->link_id); + tipc_mon_peer_down(net, sk.node, bearer_id); + tipc_nametbl_withdraw(net, &ua, &sk, sk.ref); } } -- GitLab From 297c4de6f780b63b6d2af75a730720483bf1904a Mon Sep 17 00:00:00 2001 From: Michael Walle Date: Mon, 10 May 2021 13:07:08 +0200 Subject: [PATCH 0309/3804] net: dsa: felix: re-enable TAS guard band mode Commit 316bcffe4479 ("net: dsa: felix: disable always guard band bit for TAS config") disabled the guard band and broke 802.3Qbv compliance. There are two issues here: (1) Without the guard band the end of the scheduling window could be overrun by a frame in transit. (2) Frames that don't fit into a configured window will still be sent. The reason for both issues is that the switch will schedule the _start_ of a frame transmission inside the predefined window without taking the length of the frame into account. Thus, we'll need the guard band which will close the gate early, so that a complete frame can still be sent. Revert the commit and add a note. For a lengthy discussion see [1]. [1] https://lore.kernel.org/netdev/c7618025da6723418c56a54fe4683bd7@walle.cc/ Fixes: 316bcffe4479 ("net: dsa: felix: disable always guard band bit for TAS config") Signed-off-by: Michael Walle Reviewed-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/ocelot/felix_vsc9959.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 2473bebe48e6e..f966a253d1c77 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1227,12 +1227,17 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX) return -ERANGE; - /* Set port num and disable ALWAYS_GUARD_BAND_SCH_Q, which means set - * guard band to be implemented for nonschedule queues to schedule - * queues transition. + /* Enable guard band. The switch will schedule frames without taking + * their length into account. Thus we'll always need to enable the + * guard band which reserves the time of a maximum sized frame at the + * end of the time window. + * + * Although the ALWAYS_GUARD_BAND_SCH_Q bit is global for all ports, we + * need to set PORT_NUM, because subsequent writes to PARAM_CFG_REG_n + * operate on the port number. */ - ocelot_rmw(ocelot, - QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port), + ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port) | + QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q, QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M | QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q, QSYS_TAS_PARAM_CFG_CTRL); -- GitLab From a00593737f8bac2c9e97b696e7ff84a4446653e8 Mon Sep 17 00:00:00 2001 From: Subbaraman Narayanamurthy Date: Thu, 22 Apr 2021 11:36:10 -0700 Subject: [PATCH 0310/3804] interconnect: qcom: bcm-voter: add a missing of_node_put() Add a missing of_node_put() in of_bcm_voter_get() to avoid the reference leak. Signed-off-by: Subbaraman Narayanamurthy Reviewed-by: Matthias Kaehlcke Link: https://lore.kernel.org/r/1619116570-13308-1-git-send-email-subbaram@codeaurora.org Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/bcm-voter.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/interconnect/qcom/bcm-voter.c b/drivers/interconnect/qcom/bcm-voter.c index d1591a28b7438..547f4c2593f41 100644 --- a/drivers/interconnect/qcom/bcm-voter.c +++ b/drivers/interconnect/qcom/bcm-voter.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (c) 2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2020-2021, The Linux Foundation. All rights reserved. */ #include @@ -205,6 +205,7 @@ struct bcm_voter *of_bcm_voter_get(struct device *dev, const char *name) } mutex_unlock(&bcm_voter_lock); + of_node_put(node); return voter; } EXPORT_SYMBOL_GPL(of_bcm_voter_get); -- GitLab From 1fd86e280d8b21762901e43d42d66dbfe8b8e0d3 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 11 May 2021 11:44:33 +0800 Subject: [PATCH 0311/3804] interconnect: qcom: Add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1620704673-104205-1-git-send-email-zou_wei@huawei.com Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support") Signed-off-by: Georgi Djakov --- drivers/interconnect/qcom/bcm-voter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/interconnect/qcom/bcm-voter.c b/drivers/interconnect/qcom/bcm-voter.c index 547f4c2593f41..8f385f9c2dd38 100644 --- a/drivers/interconnect/qcom/bcm-voter.c +++ b/drivers/interconnect/qcom/bcm-voter.c @@ -363,6 +363,7 @@ static const struct of_device_id bcm_voter_of_match[] = { { .compatible = "qcom,bcm-voter" }, { } }; +MODULE_DEVICE_TABLE(of, bcm_voter_of_match); static struct platform_driver qcom_icc_bcm_voter_driver = { .probe = qcom_icc_bcm_voter_probe, -- GitLab From 07adc0225484fc199e3dc15ec889f75f498c4fca Mon Sep 17 00:00:00 2001 From: Dinghao Liu Date: Mon, 12 Apr 2021 13:49:07 +0800 Subject: [PATCH 0312/3804] usb: cdns3: Fix runtime PM imbalance on error When cdns3_gadget_start() fails, a pairing PM usage counter decrement is needed to keep the counter balanced. Signed-off-by: Dinghao Liu Link: https://lore.kernel.org/r/20210412054908.7975-1-dinghao.liu@zju.edu.cn Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdns3-gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 9b1bd417cec03..a8b7b50abf645 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -3268,8 +3268,10 @@ static int __cdns3_gadget_init(struct cdns *cdns) pm_runtime_get_sync(cdns->dev); ret = cdns3_gadget_start(cdns); - if (ret) + if (ret) { + pm_runtime_put_sync(cdns->dev); return ret; + } /* * Because interrupt line can be shared with other components in -- GitLab From 3b414d1b0107fa51ad6063de9752d4b2a8063980 Mon Sep 17 00:00:00 2001 From: Pawel Laszczak Date: Tue, 20 Apr 2021 06:28:13 +0200 Subject: [PATCH 0313/3804] usb: cdnsp: Fix lack of removing request from pending list. Patch fixes lack of removing request from ep->pending_list on failure of the stop endpoint command. Driver even after failing this command must remove request from ep->pending_list. Without this fix driver can stuck in cdnsp_gadget_ep_disable function in loop: while (!list_empty(&pep->pending_list)) { preq = next_request(&pep->pending_list); cdnsp_ep_dequeue(pep, preq); } Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver") Signed-off-by: Pawel Laszczak Link: https://lore.kernel.org/r/20210420042813.34917-1-pawell@gli-login.cadence.com Signed-off-by: Peter Chen --- drivers/usb/cdns3/cdnsp-gadget.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/cdns3/cdnsp-gadget.c b/drivers/usb/cdns3/cdnsp-gadget.c index 56707b6b0f57c..c083985e387b2 100644 --- a/drivers/usb/cdns3/cdnsp-gadget.c +++ b/drivers/usb/cdns3/cdnsp-gadget.c @@ -422,17 +422,17 @@ unmap: int cdnsp_ep_dequeue(struct cdnsp_ep *pep, struct cdnsp_request *preq) { struct cdnsp_device *pdev = pep->pdev; - int ret; + int ret_stop = 0; + int ret_rem; trace_cdnsp_request_dequeue(preq); - if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_RUNNING) { - ret = cdnsp_cmd_stop_ep(pdev, pep); - if (ret) - return ret; - } + if (GET_EP_CTX_STATE(pep->out_ctx) == EP_STATE_RUNNING) + ret_stop = cdnsp_cmd_stop_ep(pdev, pep); + + ret_rem = cdnsp_remove_request(pdev, preq, pep); - return cdnsp_remove_request(pdev, preq, pep); + return ret_rem ? ret_rem : ret_stop; } static void cdnsp_zero_in_ctx(struct cdnsp_device *pdev) -- GitLab From bb4031b8af804244a7e4349d38f6624f68664bd6 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Mon, 26 Apr 2021 09:56:18 +0300 Subject: [PATCH 0314/3804] clk: Skip clk provider registration when np is NULL commit 6579c8d97ad7 ("clk: Mark fwnodes when their clock provider is added") revealed that clk/bcm/clk-raspberrypi.c driver calls devm_of_clk_add_hw_provider(), with a NULL dev->of_node, which resulted in a NULL pointer dereference in of_clk_add_hw_provider() when calling fwnode_dev_initialized(). Returning 0 is reducing the if conditions in driver code and is being consistent with the CONFIG_OF=n inline stub that returns 0 when CONFIG_OF is disabled. The downside is that drivers will maybe register clkdev lookups when they don't need to and waste some memory. Fixes: 6579c8d97ad7 ("clk: Mark fwnodes when their clock provider is added") Fixes: 3c9ea42802a1 ("clk: Mark fwnodes when their clock provider is added/removed") Reported-by: Marek Szyprowski Tested-by: Guenter Roeck Tested-by: Nathan Chancellor Reviewed-by: Stephen Boyd Reviewed-by: Saravana Kannan Reviewed-by: Nicolas Saenz Julienne Signed-off-by: Tudor Ambarus Link: https://lore.kernel.org/r/20210426065618.588144-1-tudor.ambarus@microchip.com Signed-off-by: Greg Kroah-Hartman --- drivers/clk/clk.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index e2ec1b7452439..65508eb89ec99 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -4540,6 +4540,9 @@ int of_clk_add_provider(struct device_node *np, struct of_clk_provider *cp; int ret; + if (!np) + return 0; + cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; @@ -4579,6 +4582,9 @@ int of_clk_add_hw_provider(struct device_node *np, struct of_clk_provider *cp; int ret; + if (!np) + return 0; + cp = kzalloc(sizeof(*cp), GFP_KERNEL); if (!cp) return -ENOMEM; @@ -4676,6 +4682,9 @@ void of_clk_del_provider(struct device_node *np) { struct of_clk_provider *cp; + if (!np) + return; + mutex_lock(&of_clk_mutex); list_for_each_entry(cp, &of_clk_providers, link) { if (cp->node == np) { -- GitLab From 049c4e13714ecbca567b4d5f6d563f05d431c80e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 10 May 2021 13:10:44 +0000 Subject: [PATCH 0315/3804] bpf: Fix alu32 const subreg bound tracking on bitwise operations Fix a bug in the verifier's scalar32_min_max_*() functions which leads to incorrect tracking of 32 bit bounds for the simulation of and/or/xor bitops. When both the src & dst subreg is a known constant, then the assumption is that scalar_min_max_*() will take care to update bounds correctly. However, this is not the case, for example, consider a register R2 which has a tnum of 0xffffffff00000000, meaning, lower 32 bits are known constant and in this case of value 0x00000001. R2 is then and'ed with a register R3 which is a 64 bit known constant, here, 0x100000002. What can be seen in line '10:' is that 32 bit bounds reach an invalid state where {u,s}32_min_value > {u,s}32_max_value. The reason is scalar32_min_max_*() delegates 32 bit bounds updates to scalar_min_max_*(), however, that really only takes place when both the 64 bit src & dst register is a known constant. Given scalar32_min_max_*() is intended to be designed as closely as possible to scalar_min_max_*(), update the 32 bit bounds in this situation through __mark_reg32_known() which will set all {u,s}32_{min,max}_value to the correct constant, which is 0x00000000 after the fix (given 0x00000001 & 0x00000002 in 32 bit space). This is possible given var32_off already holds the final value as dst_reg->var_off is updated before calling scalar32_min_max_*(). Before fix, invalid tracking of R2: [...] 9: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=-9223372036854775807 (0x8000000000000001),smax_value=9223372032559808513 (0x7fffffff00000001),umin_value=1,umax_value=0xffffffff00000001,var_off=(0x1; 0xffffffff00000000),s32_min_value=1,s32_max_value=1,u32_min_value=1,u32_max_value=1) R3_w=inv4294967298 R10=fp0 9: (5f) r2 &= r3 10: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=0,smax_value=4294967296 (0x100000000),umin_value=0,umax_value=0x100000000,var_off=(0x0; 0x100000000),s32_min_value=1,s32_max_value=0,u32_min_value=1,u32_max_value=0) R3_w=inv4294967298 R10=fp0 [...] After fix, correct tracking of R2: [...] 9: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=-9223372036854775807 (0x8000000000000001),smax_value=9223372032559808513 (0x7fffffff00000001),umin_value=1,umax_value=0xffffffff00000001,var_off=(0x1; 0xffffffff00000000),s32_min_value=1,s32_max_value=1,u32_min_value=1,u32_max_value=1) R3_w=inv4294967298 R10=fp0 9: (5f) r2 &= r3 10: R0_w=inv1337 R1=ctx(id=0,off=0,imm=0) R2_w=inv(id=0,smin_value=0,smax_value=4294967296 (0x100000000),umin_value=0,umax_value=0x100000000,var_off=(0x0; 0x100000000),s32_min_value=0,s32_max_value=0,u32_min_value=0,u32_max_value=0) R3_w=inv4294967298 R10=fp0 [...] Fixes: 3f50f132d840 ("bpf: Verifier, do explicit ALU32 bounds tracking") Fixes: 2921c90d4718 ("bpf: Fix a verifier failure with xor") Reported-by: Manfred Paul (@_manfp) Reported-by: Thadeu Lima de Souza Cascardo Signed-off-by: Daniel Borkmann Reviewed-by: John Fastabend Acked-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 757476c91c984..9352a1b7de2dd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7084,11 +7084,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, s32 smin_val = src_reg->s32_min_value; u32 umax_val = src_reg->u32_max_value; - /* Assuming scalar64_min_max_and will be called so its safe - * to skip updating register for known 32-bit case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get our minimum from the var_off, since that's inherently * bitwise. Our maximum is the minimum of the operands' maxima. @@ -7108,7 +7107,6 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, dst_reg->s32_min_value = dst_reg->u32_min_value; dst_reg->s32_max_value = dst_reg->u32_max_value; } - } static void scalar_min_max_and(struct bpf_reg_state *dst_reg, @@ -7155,11 +7153,10 @@ static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, s32 smin_val = src_reg->s32_min_value; u32 umin_val = src_reg->u32_min_value; - /* Assuming scalar64_min_max_or will be called so it is safe - * to skip updating register for known case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get our maximum from the var_off, and our minimum is the * maximum of the operands' minima @@ -7224,11 +7221,10 @@ static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, struct tnum var32_off = tnum_subreg(dst_reg->var_off); s32 smin_val = src_reg->s32_min_value; - /* Assuming scalar64_min_max_xor will be called so it is safe - * to skip updating register for known case. - */ - if (src_known && dst_known) + if (src_known && dst_known) { + __mark_reg32_known(dst_reg, var32_off.value); return; + } /* We get both minimum and maximum from the var32_off. */ dst_reg->u32_min_value = var32_off.value; -- GitLab From 2515dd6ce8e545b0b2eece84920048ef9ed846c4 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 19 Apr 2021 16:17:41 -0700 Subject: [PATCH 0316/3804] stack: Replace "o" output with "r" input constraint "o" isn't a common asm() constraint to use; it triggers an assertion in assert-enabled builds of LLVM that it's not recognized when targeting aarch64 (though it appears to fall back to "m"). It's fixed in LLVM 13 now, but there isn't really a good reason to use "o" in particular here. To avoid causing build issues for those using assert-enabled builds of earlier LLVM versions, the constraint needs changing. Instead, if the point is to retain the __builtin_alloca(), make ptr appear to "escape" via being an input to an empty inline asm block. This is preferable anyways, since otherwise this looks like a dead store. While the use of "r" was considered in https://lore.kernel.org/lkml/202104011447.2E7F543@keescook/ it was only tested as an output (which looks like a dead store, and wasn't sufficient). Use "r" as an input constraint instead, which behaves correctly across compilers and architectures. Fixes: 39218ff4c625 ("stack: Optionally randomize kernel stack offset each syscall") Signed-off-by: Nick Desaulniers Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Tested-by: Kees Cook Tested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Link: https://reviews.llvm.org/D100412 Link: https://bugs.llvm.org/show_bug.cgi?id=49956 Link: https://lore.kernel.org/r/20210419231741.4084415-1-keescook@chromium.org --- include/linux/randomize_kstack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h index fd80fab663a96..bebc911161b6f 100644 --- a/include/linux/randomize_kstack.h +++ b/include/linux/randomize_kstack.h @@ -38,7 +38,7 @@ void *__builtin_alloca(size_t size); u32 offset = raw_cpu_read(kstack_offset); \ u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset)); \ /* Keep allocation even after "ptr" loses scope. */ \ - asm volatile("" : "=o"(*ptr) :: "memory"); \ + asm volatile("" :: "r"(ptr) : "memory"); \ } \ } while (0) -- GitLab From cc2520909c2df9ad51d642bf09b3da26a9f56393 Mon Sep 17 00:00:00 2001 From: Jernej Skrabec Date: Wed, 5 May 2021 19:33:35 +0200 Subject: [PATCH 0317/3804] MAINTAINERS: Update my e-mail Old e-mail address doesn't work anymore, update it to new one. Link: https://lore.kernel.org/r/20210505173335.1483575-1-jernej.skrabec@gmail.com Signed-off-by: Jernej Skrabec Signed-off-by: Maxime Ripard --- .mailmap | 1 + MAINTAINERS | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.mailmap b/.mailmap index 2d93232ed72b8..ca235ef4755f3 100644 --- a/.mailmap +++ b/.mailmap @@ -159,6 +159,7 @@ Jeff Layton Jeff Layton Jens Axboe Jens Osterkamp +Jernej Skrabec Jiri Slaby Jiri Slaby Jiri Slaby diff --git a/MAINTAINERS b/MAINTAINERS index 7fdc513392f45..2e9063d018d0e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1572,7 +1572,7 @@ F: drivers/clk/sunxi/ ARM/Allwinner sunXi SoC support M: Maxime Ripard M: Chen-Yu Tsai -R: Jernej Skrabec +R: Jernej Skrabec L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git @@ -5003,7 +5003,7 @@ S: Maintained F: drivers/net/fddi/defza.* DEINTERLACE DRIVERS FOR ALLWINNER H3 -M: Jernej Skrabec +M: Jernej Skrabec L: linux-media@vger.kernel.org S: Maintained T: git git://linuxtv.org/media_tree.git @@ -5527,7 +5527,7 @@ F: include/linux/power/smartreflex.h DRM DRIVER FOR ALLWINNER DE2 AND DE3 ENGINE M: Maxime Ripard M: Chen-Yu Tsai -R: Jernej Skrabec +R: Jernej Skrabec L: dri-devel@lists.freedesktop.org S: Supported T: git git://anongit.freedesktop.org/drm/drm-misc @@ -5903,7 +5903,7 @@ M: Andrzej Hajda M: Neil Armstrong R: Laurent Pinchart R: Jonas Karlman -R: Jernej Skrabec +R: Jernej Skrabec S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/bridge/ @@ -15490,7 +15490,7 @@ F: include/uapi/linux/rose.h F: net/rose/ ROTATION DRIVER FOR ALLWINNER A83T -M: Jernej Skrabec +M: Jernej Skrabec L: linux-media@vger.kernel.org S: Maintained T: git git://linuxtv.org/media_tree.git -- GitLab From ab77fe8935c57d1339d3df64957f32e87f0d5ef3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 15:41:27 +0300 Subject: [PATCH 0318/3804] spi: pxa2xx: Introduce int_stop_and_reset() helper Currently we have three times the same few lines repeated in the code. Deduplicate them by newly introduced int_stop_and_reset() helper. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510124134.24638-8-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 1d4c7f4217ede..5572eec683819 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -620,13 +620,20 @@ static void reset_sccr1(struct driver_data *drv_data) pxa2xx_spi_write(drv_data, SSCR1, sccr1_reg); } -static void int_error_stop(struct driver_data *drv_data, const char *msg) +static void int_stop_and_reset(struct driver_data *drv_data) { - /* Stop and reset SSP */ + /* Clear and disable interrupts */ write_SSSR_CS(drv_data, drv_data->clear_sr); reset_sccr1(drv_data); - if (!pxa25x_ssp_comp(drv_data)) - pxa2xx_spi_write(drv_data, SSTO, 0); + if (pxa25x_ssp_comp(drv_data)) + return; + + pxa2xx_spi_write(drv_data, SSTO, 0); +} + +static void int_error_stop(struct driver_data *drv_data, const char *msg) +{ + int_stop_and_reset(drv_data); pxa2xx_spi_flush(drv_data); pxa2xx_spi_off(drv_data); @@ -638,11 +645,7 @@ static void int_error_stop(struct driver_data *drv_data, const char *msg) static void int_transfer_complete(struct driver_data *drv_data) { - /* Clear and disable interrupts */ - write_SSSR_CS(drv_data, drv_data->clear_sr); - reset_sccr1(drv_data); - if (!pxa25x_ssp_comp(drv_data)) - pxa2xx_spi_write(drv_data, SSTO, 0); + int_stop_and_reset(drv_data); spi_finalize_current_transfer(drv_data->controller); } @@ -1151,11 +1154,7 @@ static int pxa2xx_spi_slave_abort(struct spi_controller *controller) { struct driver_data *drv_data = spi_controller_get_devdata(controller); - /* Stop and reset SSP */ - write_SSSR_CS(drv_data, drv_data->clear_sr); - reset_sccr1(drv_data); - if (!pxa25x_ssp_comp(drv_data)) - pxa2xx_spi_write(drv_data, SSTO, 0); + int_stop_and_reset(drv_data); pxa2xx_spi_flush(drv_data); pxa2xx_spi_off(drv_data); -- GitLab From 4761d2e7e51cfbe6fdb4e95903d407927f519f50 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 15:41:28 +0300 Subject: [PATCH 0319/3804] spi: pxa2xx: Reuse int_error_stop() in pxa2xx_spi_slave_abort() It appears that pxa2xx_spi_slave_abort()almost repeats the functionality of the int_error_stop(). Reuse int_error_stop() in pxa2xx_spi_slave_abort(). Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510124134.24638-9-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 5572eec683819..087c84e605b94 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -631,7 +631,7 @@ static void int_stop_and_reset(struct driver_data *drv_data) pxa2xx_spi_write(drv_data, SSTO, 0); } -static void int_error_stop(struct driver_data *drv_data, const char *msg) +static void int_error_stop(struct driver_data *drv_data, const char *msg, int err) { int_stop_and_reset(drv_data); pxa2xx_spi_flush(drv_data); @@ -639,7 +639,7 @@ static void int_error_stop(struct driver_data *drv_data, const char *msg) dev_err(drv_data->ssp->dev, "%s\n", msg); - drv_data->controller->cur_msg->status = -EIO; + drv_data->controller->cur_msg->status = err; spi_finalize_current_transfer(drv_data->controller); } @@ -658,12 +658,12 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data) u32 irq_status = pxa2xx_spi_read(drv_data, SSSR) & irq_mask; if (irq_status & SSSR_ROR) { - int_error_stop(drv_data, "interrupt_transfer: fifo overrun"); + int_error_stop(drv_data, "interrupt_transfer: fifo overrun", -EIO); return IRQ_HANDLED; } if (irq_status & SSSR_TUR) { - int_error_stop(drv_data, "interrupt_transfer: fifo underrun"); + int_error_stop(drv_data, "interrupt_transfer: fifo underrun", -EIO); return IRQ_HANDLED; } @@ -1154,14 +1154,7 @@ static int pxa2xx_spi_slave_abort(struct spi_controller *controller) { struct driver_data *drv_data = spi_controller_get_devdata(controller); - int_stop_and_reset(drv_data); - pxa2xx_spi_flush(drv_data); - pxa2xx_spi_off(drv_data); - - dev_dbg(drv_data->ssp->dev, "transfer aborted\n"); - - drv_data->controller->cur_msg->status = -EINTR; - spi_finalize_current_transfer(drv_data->controller); + int_error_stop(drv_data, "transfer aborted", -EINTR); return 0; } -- GitLab From 0c8ccd8b267fc735e4621774ce62728f27d42863 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 15:41:29 +0300 Subject: [PATCH 0320/3804] spi: pxa2xx: Use pxa_ssp_enable()/pxa_ssp_disable() in the driver There are few places that repeat the logic of pxa_ssp_enable() and pxa_ssp_disable(). Use them instead of open coded variants. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510124134.24638-10-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-dma.c | 4 +--- drivers/spi/spi-pxa2xx.c | 36 ++++++++++++++++++------------------ include/linux/pxa2xx_ssp.h | 16 ++++++++++++++++ sound/soc/pxa/pxa-ssp.c | 16 ---------------- 4 files changed, 35 insertions(+), 37 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c index e00dbadd39ecb..5ca01ad7f4604 100644 --- a/drivers/spi/spi-pxa2xx-dma.c +++ b/drivers/spi/spi-pxa2xx-dma.c @@ -50,9 +50,7 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data, if (error) { /* In case we got an error we disable the SSP now */ - pxa2xx_spi_write(drv_data, SSCR0, - pxa2xx_spi_read(drv_data, SSCR0) - & ~SSCR0_SSE); + pxa_ssp_disable(drv_data->ssp); msg->status = -EIO; } diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 087c84e605b94..a27f51f5db65f 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -286,13 +286,11 @@ static u32 pxa2xx_configure_sscr0(const struct driver_data *drv_data, case QUARK_X1000_SSP: return clk_div | QUARK_X1000_SSCR0_Motorola - | QUARK_X1000_SSCR0_DataSize(bits > 32 ? 8 : bits) - | SSCR0_SSE; + | QUARK_X1000_SSCR0_DataSize(bits > 32 ? 8 : bits); default: return clk_div | SSCR0_Motorola | SSCR0_DataSize(bits > 16 ? bits - 16 : bits) - | SSCR0_SSE | (bits > 16 ? SSCR0_EDSS : 0); } } @@ -498,8 +496,7 @@ static void pxa2xx_spi_off(struct driver_data *drv_data) if (is_mmp2_ssp(drv_data)) return; - pxa2xx_spi_write(drv_data, SSCR0, - pxa2xx_spi_read(drv_data, SSCR0) & ~SSCR0_SSE); + pxa_ssp_disable(drv_data->ssp); } static int null_writer(struct driver_data *drv_data) @@ -1098,25 +1095,26 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller, (pxa2xx_spi_read(drv_data, DDS_RATE) != chip->dds_rate)) pxa2xx_spi_write(drv_data, DDS_RATE, chip->dds_rate); + /* Stop the SSP */ + if (!is_mmp2_ssp(drv_data)) + pxa_ssp_disable(drv_data->ssp); + + if (!pxa25x_ssp_comp(drv_data)) + pxa2xx_spi_write(drv_data, SSTO, chip->timeout); + /* see if we need to reload the config registers */ if ((pxa2xx_spi_read(drv_data, SSCR0) != cr0) || (pxa2xx_spi_read(drv_data, SSCR1) & change_mask) != (cr1 & change_mask)) { - /* stop the SSP, and update the other bits */ - if (!is_mmp2_ssp(drv_data)) - pxa2xx_spi_write(drv_data, SSCR0, cr0 & ~SSCR0_SSE); - if (!pxa25x_ssp_comp(drv_data)) - pxa2xx_spi_write(drv_data, SSTO, chip->timeout); /* first set CR1 without interrupt and service enables */ pxa2xx_spi_write(drv_data, SSCR1, cr1 & change_mask); - /* restart the SSP */ + /* Update the other bits */ pxa2xx_spi_write(drv_data, SSCR0, cr0); - - } else { - if (!pxa25x_ssp_comp(drv_data)) - pxa2xx_spi_write(drv_data, SSTO, chip->timeout); } + /* Restart the SSP */ + pxa_ssp_enable(drv_data->ssp); + if (is_mmp2_ssp(drv_data)) { u8 tx_level = (pxa2xx_spi_read(drv_data, SSSR) & SSSR_TFL_MASK) >> 8; @@ -1786,8 +1784,9 @@ static int pxa2xx_spi_probe(struct platform_device *pdev) controller->min_speed_hz = DIV_ROUND_UP(controller->max_speed_hz, 512); + pxa_ssp_disable(ssp); + /* Load default SSP configuration */ - pxa2xx_spi_write(drv_data, SSCR0, 0); switch (drv_data->ssp_type) { case QUARK_X1000_SSP: tmp = QUARK_X1000_SSCR1_RxTresh(RX_THRESH_QUARK_X1000_DFLT) | @@ -1928,7 +1927,7 @@ static int pxa2xx_spi_remove(struct platform_device *pdev) spi_unregister_controller(drv_data->controller); /* Disable the SSP at the peripheral and SOC level */ - pxa2xx_spi_write(drv_data, SSCR0, 0); + pxa_ssp_disable(ssp); clk_disable_unprepare(ssp->clk); /* Release DMA */ @@ -1957,7 +1956,8 @@ static int pxa2xx_spi_suspend(struct device *dev) status = spi_controller_suspend(drv_data->controller); if (status != 0) return status; - pxa2xx_spi_write(drv_data, SSCR0, 0); + + pxa_ssp_disable(ssp); if (!pm_runtime_suspended(dev)) clk_disable_unprepare(ssp->clk); diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 1b6c1a0922bd2..fdfbe17e15f46 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -254,6 +254,22 @@ static inline u32 pxa_ssp_read_reg(struct ssp_device *dev, u32 reg) return __raw_readl(dev->mmio_base + reg); } +static inline void pxa_ssp_enable(struct ssp_device *ssp) +{ + u32 sscr0; + + sscr0 = pxa_ssp_read_reg(ssp, SSCR0) | SSCR0_SSE; + pxa_ssp_write_reg(ssp, SSCR0, sscr0); +} + +static inline void pxa_ssp_disable(struct ssp_device *ssp) +{ + u32 sscr0; + + sscr0 = pxa_ssp_read_reg(ssp, SSCR0) & ~SSCR0_SSE; + pxa_ssp_write_reg(ssp, SSCR0, sscr0); +} + #if IS_ENABLED(CONFIG_PXA_SSP) struct ssp_device *pxa_ssp_request(int port, const char *label); void pxa_ssp_free(struct ssp_device *); diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index b941adcbb8f95..939e7e28486aa 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -61,22 +61,6 @@ static void dump_registers(struct ssp_device *ssp) pxa_ssp_read_reg(ssp, SSACD)); } -static void pxa_ssp_enable(struct ssp_device *ssp) -{ - uint32_t sscr0; - - sscr0 = __raw_readl(ssp->mmio_base + SSCR0) | SSCR0_SSE; - __raw_writel(sscr0, ssp->mmio_base + SSCR0); -} - -static void pxa_ssp_disable(struct ssp_device *ssp) -{ - uint32_t sscr0; - - sscr0 = __raw_readl(ssp->mmio_base + SSCR0) & ~SSCR0_SSE; - __raw_writel(sscr0, ssp->mmio_base + SSCR0); -} - static void pxa_ssp_set_dma_params(struct ssp_device *ssp, int width4, int out, struct snd_dmaengine_dai_dma_data *dma) { -- GitLab From 1bed378c6b9116c51ae59b970cf3d9b4e9e62ced Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 15:41:30 +0300 Subject: [PATCH 0321/3804] spi: pxa2xx: Extract pxa2xx_spi_update() helper There are few places that repeat the logic of "update if changed". Extract pxa2xx_spi_update() helper to deduplicate that. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510124134.24638-11-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index a27f51f5db65f..54eaa048651fa 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -200,6 +200,12 @@ static bool is_mmp2_ssp(const struct driver_data *drv_data) return drv_data->ssp_type == MMP2_SSP; } +static void pxa2xx_spi_update(const struct driver_data *drv_data, u32 reg, u32 mask, u32 value) +{ + if ((pxa2xx_spi_read(drv_data, reg) & mask) != value) + pxa2xx_spi_write(drv_data, reg, value & mask); +} + static u32 pxa2xx_spi_get_ssrc1_change_mask(const struct driver_data *drv_data) { switch (drv_data->ssp_type) { @@ -1081,19 +1087,12 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller, dma_mapped ? "DMA" : "PIO"); if (is_lpss_ssp(drv_data)) { - if ((pxa2xx_spi_read(drv_data, SSIRF) & 0xff) - != chip->lpss_rx_threshold) - pxa2xx_spi_write(drv_data, SSIRF, - chip->lpss_rx_threshold); - if ((pxa2xx_spi_read(drv_data, SSITF) & 0xffff) - != chip->lpss_tx_threshold) - pxa2xx_spi_write(drv_data, SSITF, - chip->lpss_tx_threshold); + pxa2xx_spi_update(drv_data, SSIRF, GENMASK(7, 0), chip->lpss_rx_threshold); + pxa2xx_spi_update(drv_data, SSITF, GENMASK(15, 0), chip->lpss_tx_threshold); } - if (is_quark_x1000_ssp(drv_data) && - (pxa2xx_spi_read(drv_data, DDS_RATE) != chip->dds_rate)) - pxa2xx_spi_write(drv_data, DDS_RATE, chip->dds_rate); + if (is_quark_x1000_ssp(drv_data)) + pxa2xx_spi_update(drv_data, DDS_RATE, GENMASK(23, 0), chip->dds_rate); /* Stop the SSP */ if (!is_mmp2_ssp(drv_data)) @@ -1102,15 +1101,11 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller, if (!pxa25x_ssp_comp(drv_data)) pxa2xx_spi_write(drv_data, SSTO, chip->timeout); + /* first set CR1 without interrupt and service enables */ + pxa2xx_spi_update(drv_data, SSCR1, change_mask, cr1); + /* see if we need to reload the config registers */ - if ((pxa2xx_spi_read(drv_data, SSCR0) != cr0) - || (pxa2xx_spi_read(drv_data, SSCR1) & change_mask) - != (cr1 & change_mask)) { - /* first set CR1 without interrupt and service enables */ - pxa2xx_spi_write(drv_data, SSCR1, cr1 & change_mask); - /* Update the other bits */ - pxa2xx_spi_write(drv_data, SSCR0, cr0); - } + pxa2xx_spi_update(drv_data, SSCR0, GENMASK(31, 0), cr0); /* Restart the SSP */ pxa_ssp_enable(drv_data->ssp); -- GitLab From 42c80cd439a938569a86f6ae135d38c1cda5569b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 15:41:31 +0300 Subject: [PATCH 0322/3804] spi: pxa2xx: Extract clear_SSCR1_bits() helper There are few places that repeat the logic of "clear some bits in SSCR1". Extract clear_SSCR1_bits() helper to deduplicate that. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510124134.24638-12-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-dma.c | 4 +--- drivers/spi/spi-pxa2xx.c | 7 ++----- drivers/spi/spi-pxa2xx.h | 5 +++++ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c index 5ca01ad7f4604..e581027e99f9f 100644 --- a/drivers/spi/spi-pxa2xx-dma.c +++ b/drivers/spi/spi-pxa2xx-dma.c @@ -41,9 +41,7 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data, } /* Clear status & disable interrupts */ - pxa2xx_spi_write(drv_data, SSCR1, - pxa2xx_spi_read(drv_data, SSCR1) - & ~drv_data->dma_cr1); + clear_SSCR1_bits(drv_data, drv_data->dma_cr1); write_SSSR_CS(drv_data, drv_data->clear_sr); if (!pxa25x_ssp_comp(drv_data)) pxa2xx_spi_write(drv_data, SSTO, 0); diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 54eaa048651fa..3a4ad16614f7a 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -733,8 +733,7 @@ static irqreturn_t interrupt_transfer(struct driver_data *drv_data) static void handle_bad_msg(struct driver_data *drv_data) { pxa2xx_spi_off(drv_data); - pxa2xx_spi_write(drv_data, SSCR1, - pxa2xx_spi_read(drv_data, SSCR1) & ~drv_data->int_cr1); + clear_SSCR1_bits(drv_data, drv_data->int_cr1); if (!pxa25x_ssp_comp(drv_data)) pxa2xx_spi_write(drv_data, SSTO, 0); write_SSSR_CS(drv_data, drv_data->clear_sr); @@ -1161,9 +1160,7 @@ static void pxa2xx_spi_handle_err(struct spi_controller *controller, pxa2xx_spi_off(drv_data); /* Clear and disable interrupts and service requests */ write_SSSR_CS(drv_data, drv_data->clear_sr); - pxa2xx_spi_write(drv_data, SSCR1, - pxa2xx_spi_read(drv_data, SSCR1) - & ~(drv_data->int_cr1 | drv_data->dma_cr1)); + clear_SSCR1_bits(drv_data, drv_data->int_cr1 | drv_data->dma_cr1); if (!pxa25x_ssp_comp(drv_data)) pxa2xx_spi_write(drv_data, SSTO, 0); diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index 739e264feaa69..ed63f7165cd8e 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -105,6 +105,11 @@ static inline int pxa25x_ssp_comp(struct driver_data *drv_data) } } +static inline void clear_SSCR1_bits(const struct driver_data *drv_data, u32 bits) +{ + pxa2xx_spi_write(drv_data, SSCR1, pxa2xx_spi_read(drv_data, SSCR1) & ~bits); +} + static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val) { if (drv_data->ssp_type == CE4100_SSP || -- GitLab From 6d380132eaea536bef641f21847c8a7987e96ad8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 15:41:32 +0300 Subject: [PATCH 0323/3804] spi: pxa2xx: Extract read_SSSR_bits() helper There are few places that repeat the logic of "read some bits from SSSR". Extract read_SSSR_bits() helper to deduplicate that. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510124134.24638-13-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-dma.c | 9 +++------ drivers/spi/spi-pxa2xx.c | 26 +++++++++++--------------- drivers/spi/spi-pxa2xx.h | 7 ++++++- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c index e581027e99f9f..f022d82dcb1bf 100644 --- a/drivers/spi/spi-pxa2xx-dma.c +++ b/drivers/spi/spi-pxa2xx-dma.c @@ -34,11 +34,8 @@ static void pxa2xx_spi_dma_transfer_complete(struct driver_data *drv_data, * might not know about the error yet. So we re-check the * ROR bit here before we clear the status register. */ - if (!error) { - u32 status = pxa2xx_spi_read(drv_data, SSSR) - & drv_data->mask_sr; - error = status & SSSR_ROR; - } + if (!error) + error = read_SSSR_bits(drv_data, drv_data->mask_sr) & SSSR_ROR; /* Clear status & disable interrupts */ clear_SSCR1_bits(drv_data, drv_data->dma_cr1); @@ -119,7 +116,7 @@ irqreturn_t pxa2xx_spi_dma_transfer(struct driver_data *drv_data) { u32 status; - status = pxa2xx_spi_read(drv_data, SSSR) & drv_data->mask_sr; + status = read_SSSR_bits(drv_data, drv_data->mask_sr); if (status & SSSR_ROR) { dev_err(drv_data->ssp->dev, "FIFO overrun\n"); diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 3a4ad16614f7a..af3f01de8f5b1 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -247,7 +247,7 @@ static bool pxa2xx_spi_txfifo_full(const struct driver_data *drv_data) break; } - return (pxa2xx_spi_read(drv_data, SSSR) & mask) == mask; + return read_SSSR_bits(drv_data, mask) == mask; } static void pxa2xx_spi_clear_rx_thre(const struct driver_data *drv_data, @@ -488,7 +488,7 @@ int pxa2xx_spi_flush(struct driver_data *drv_data) unsigned long limit = loops_per_jiffy << 1; do { - while (pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE) + while (read_SSSR_bits(drv_data, SSSR_RNE)) pxa2xx_spi_read(drv_data, SSDR); } while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_BSY) && --limit); write_SSSR_CS(drv_data, SSSR_ROR); @@ -523,8 +523,7 @@ static int null_reader(struct driver_data *drv_data) { u8 n_bytes = drv_data->n_bytes; - while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE) - && (drv_data->rx < drv_data->rx_end)) { + while (read_SSSR_bits(drv_data, SSSR_RNE) && drv_data->rx < drv_data->rx_end) { pxa2xx_spi_read(drv_data, SSDR); drv_data->rx += n_bytes; } @@ -546,8 +545,7 @@ static int u8_writer(struct driver_data *drv_data) static int u8_reader(struct driver_data *drv_data) { - while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE) - && (drv_data->rx < drv_data->rx_end)) { + while (read_SSSR_bits(drv_data, SSSR_RNE) && drv_data->rx < drv_data->rx_end) { *(u8 *)(drv_data->rx) = pxa2xx_spi_read(drv_data, SSDR); ++drv_data->rx; } @@ -569,8 +567,7 @@ static int u16_writer(struct driver_data *drv_data) static int u16_reader(struct driver_data *drv_data) { - while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE) - && (drv_data->rx < drv_data->rx_end)) { + while (read_SSSR_bits(drv_data, SSSR_RNE) && drv_data->rx < drv_data->rx_end) { *(u16 *)(drv_data->rx) = pxa2xx_spi_read(drv_data, SSDR); drv_data->rx += 2; } @@ -592,8 +589,7 @@ static int u32_writer(struct driver_data *drv_data) static int u32_reader(struct driver_data *drv_data) { - while ((pxa2xx_spi_read(drv_data, SSSR) & SSSR_RNE) - && (drv_data->rx < drv_data->rx_end)) { + while (read_SSSR_bits(drv_data, SSSR_RNE) && drv_data->rx < drv_data->rx_end) { *(u32 *)(drv_data->rx) = pxa2xx_spi_read(drv_data, SSDR); drv_data->rx += 4; } @@ -655,10 +651,11 @@ static void int_transfer_complete(struct driver_data *drv_data) static irqreturn_t interrupt_transfer(struct driver_data *drv_data) { - u32 irq_mask = (pxa2xx_spi_read(drv_data, SSCR1) & SSCR1_TIE) ? - drv_data->mask_sr : drv_data->mask_sr & ~SSSR_TFS; + u32 irq_status; - u32 irq_status = pxa2xx_spi_read(drv_data, SSSR) & irq_mask; + irq_status = read_SSSR_bits(drv_data, drv_data->mask_sr); + if (!(pxa2xx_spi_read(drv_data, SSCR1) & SSCR1_TIE)) + irq_status &= ~SSSR_TFS; if (irq_status & SSSR_ROR) { int_error_stop(drv_data, "interrupt_transfer: fifo overrun", -EIO); @@ -1110,8 +1107,7 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller, pxa_ssp_enable(drv_data->ssp); if (is_mmp2_ssp(drv_data)) { - u8 tx_level = (pxa2xx_spi_read(drv_data, SSSR) - & SSSR_TFL_MASK) >> 8; + u8 tx_level = read_SSSR_bits(drv_data, SSSR_TFL_MASK) >> 8; if (tx_level) { /* On MMP2, flipping SSE doesn't to empty TXFIFO. */ diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index ed63f7165cd8e..d2cb40f97c4b7 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -110,11 +110,16 @@ static inline void clear_SSCR1_bits(const struct driver_data *drv_data, u32 bits pxa2xx_spi_write(drv_data, SSCR1, pxa2xx_spi_read(drv_data, SSCR1) & ~bits); } +static inline u32 read_SSSR_bits(const struct driver_data *drv_data, u32 bits) +{ + return pxa2xx_spi_read(drv_data, SSSR) & bits; +} + static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val) { if (drv_data->ssp_type == CE4100_SSP || drv_data->ssp_type == QUARK_X1000_SSP) - val |= pxa2xx_spi_read(drv_data, SSSR) & SSSR_ALT_FRM_MASK; + val |= read_SSSR_bits(drv_data, SSSR_ALT_FRM_MASK); pxa2xx_spi_write(drv_data, SSSR, val); } -- GitLab From eca32c3974c0664f88fed90b327f473bd18a4809 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 15:41:33 +0300 Subject: [PATCH 0324/3804] spi: pxa2xx: Constify struct driver_data parameter In a couple of functions the contents of struct driver_data are not altered, hence we may constify the respective function parameter. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510124134.24638-14-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index d2cb40f97c4b7..5c6a5e0f249e9 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -93,7 +93,7 @@ static inline void pxa2xx_spi_write(const struct driver_data *drv_data, u32 reg, #define DMA_ALIGNMENT 8 -static inline int pxa25x_ssp_comp(struct driver_data *drv_data) +static inline int pxa25x_ssp_comp(const struct driver_data *drv_data) { switch (drv_data->ssp_type) { case PXA25x_SSP: @@ -115,7 +115,7 @@ static inline u32 read_SSSR_bits(const struct driver_data *drv_data, u32 bits) return pxa2xx_spi_read(drv_data, SSSR) & bits; } -static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val) +static inline void write_SSSR_CS(const struct driver_data *drv_data, u32 val) { if (drv_data->ssp_type == CE4100_SSP || drv_data->ssp_type == QUARK_X1000_SSP) -- GitLab From 3fdb59cf10b020b32b9f1dfc78611320623dcb3e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 15:41:34 +0300 Subject: [PATCH 0325/3804] spi: pxa2xx: Introduce special type for Merrifield SPIs Intel Merrifield SPI is actually more closer to PXA3xx. It has extended FIFO (32 bytes) and additional registers to get or set FIFO thresholds. Introduce new type for Intel Merrifield SPI host controllers and handle bigger FIFO size. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510124134.24638-15-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-pxa2xx-pci.c | 2 +- drivers/spi/spi-pxa2xx.c | 32 +++++++++++++++++++++++++++++--- include/linux/pxa2xx_ssp.h | 16 ++++++++++++++++ 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index a259be12d3265..dce9ade9a4dfb 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -179,7 +179,7 @@ static struct pxa_spi_info spi_info_configs[] = { .rx_param = &bsw2_rx_param, }, [PORT_MRFLD] = { - .type = PXA27x_SSP, + .type = MRFLD_SSP, .max_clk_rate = 25000000, .setup = mrfld_spi_setup, }, diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index af3f01de8f5b1..5985b39e2dd60 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -200,6 +200,11 @@ static bool is_mmp2_ssp(const struct driver_data *drv_data) return drv_data->ssp_type == MMP2_SSP; } +static bool is_mrfld_ssp(const struct driver_data *drv_data) +{ + return drv_data->ssp_type == MRFLD_SSP; +} + static void pxa2xx_spi_update(const struct driver_data *drv_data, u32 reg, u32 mask, u32 value) { if ((pxa2xx_spi_read(drv_data, reg) & mask) != value) @@ -1087,6 +1092,15 @@ static int pxa2xx_spi_transfer_one(struct spi_controller *controller, pxa2xx_spi_update(drv_data, SSITF, GENMASK(15, 0), chip->lpss_tx_threshold); } + if (is_mrfld_ssp(drv_data)) { + u32 thresh = 0; + + thresh |= SFIFOTT_RxThresh(chip->lpss_rx_threshold); + thresh |= SFIFOTT_TxThresh(chip->lpss_tx_threshold); + + pxa2xx_spi_update(drv_data, SFIFOTT, 0xffffffff, thresh); + } + if (is_quark_x1000_ssp(drv_data)) pxa2xx_spi_update(drv_data, DDS_RATE, GENMASK(23, 0), chip->dds_rate); @@ -1253,6 +1267,11 @@ static int setup(struct spi_device *spi) tx_hi_thres = 0; rx_thres = RX_THRESH_QUARK_X1000_DFLT; break; + case MRFLD_SSP: + tx_thres = TX_THRESH_MRFLD_DFLT; + tx_hi_thres = 0; + rx_thres = RX_THRESH_MRFLD_DFLT; + break; case CE4100_SSP: tx_thres = TX_THRESH_CE4100_DFLT; tx_hi_thres = 0; @@ -1328,9 +1347,16 @@ static int setup(struct spi_device *spi) chip->cr1 |= SSCR1_SPH; } - chip->lpss_rx_threshold = SSIRF_RxThresh(rx_thres); - chip->lpss_tx_threshold = SSITF_TxLoThresh(tx_thres) - | SSITF_TxHiThresh(tx_hi_thres); + if (is_lpss_ssp(drv_data)) { + chip->lpss_rx_threshold = SSIRF_RxThresh(rx_thres); + chip->lpss_tx_threshold = SSITF_TxLoThresh(tx_thres) | + SSITF_TxHiThresh(tx_hi_thres); + } + + if (is_mrfld_ssp(drv_data)) { + chip->lpss_rx_threshold = rx_thres; + chip->lpss_tx_threshold = tx_thres; + } /* set dma burst and threshold outside of chip_info path so that if * chip_info goes away after setting chip->enable_dma, the diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index fdfbe17e15f46..2b21bc1f3c732 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -183,6 +183,21 @@ struct device_node; #define SSACD_ACPS(x) ((x) << 4) /* Audio clock PLL select */ #define SSACD_SCDX8 BIT(7) /* SYSCLK division ratio select */ +/* Intel Merrifield SSP */ +#define SFIFOL 0x68 /* FIFO level */ +#define SFIFOTT 0x6c /* FIFO trigger threshold */ + +#define RX_THRESH_MRFLD_DFLT 16 +#define TX_THRESH_MRFLD_DFLT 16 + +#define SFIFOL_TFL_MASK GENMASK(15, 0) /* Transmit FIFO Level mask */ +#define SFIFOL_RFL_MASK GENMASK(31, 16) /* Receive FIFO Level mask */ + +#define SFIFOTT_TFT GENMASK(15, 0) /* Transmit FIFO Threshold (mask) */ +#define SFIFOTT_TxThresh(x) (((x) - 1) << 0) /* TX FIFO trigger threshold / level */ +#define SFIFOTT_RFT GENMASK(31, 16) /* Receive FIFO Threshold (mask) */ +#define SFIFOTT_RxThresh(x) (((x) - 1) << 16) /* RX FIFO trigger threshold / level */ + /* LPSS SSP */ #define SSITF 0x44 /* TX FIFO trigger level */ #define SSITF_TxHiThresh(x) (((x) - 1) << 0) @@ -205,6 +220,7 @@ enum pxa_ssp_type { MMP2_SSP, PXA910_SSP, CE4100_SSP, + MRFLD_SSP, QUARK_X1000_SSP, LPSS_LPT_SSP, /* Keep LPSS types sorted with lpss_platforms[] */ LPSS_BYT_SSP, -- GitLab From 1b55767dfdd93c42712e67e986ac14f0c4debd0c Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 11 May 2021 00:25:05 +0800 Subject: [PATCH 0326/3804] erofs: fix broken illustration in documentation Illustration was broken after ReST conversion by accident. (checked by 'make SPHINXDIRS="filesystems" htmldocs') Link: https://lore.kernel.org/r/20210510162506.28637-1-xiang@kernel.org Fixes: e66d8631ddb3 ("docs: filesystems: convert erofs.txt to ReST") Reviewed-by: Chao Yu Cc: Mauro Carvalho Chehab Signed-off-by: Gao Xiang --- Documentation/filesystems/erofs.rst | 119 ++++++++++++++-------------- 1 file changed, 59 insertions(+), 60 deletions(-) diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index bf145171c2bf8..869b183ff2158 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -113,31 +113,31 @@ may not. All metadatas can be now observed in two different spaces (views): :: - |-> aligned with 8B - |-> followed closely - + meta_blkaddr blocks |-> another slot - _____________________________________________________________________ - | ... | inode | xattrs | extents | data inline | ... | inode ... - |________|_______|(optional)|(optional)|__(optional)_|_____|__________ - |-> aligned with the inode slot size - . . - . . - . . - . . - . . - . . - .____________________________________________________|-> aligned with 4B - | xattr_ibody_header | shared xattrs | inline xattrs | - |____________________|_______________|_______________| - |-> 12 bytes <-|->x * 4 bytes<-| . - . . . - . . . - . . . - ._______________________________.______________________. - | id | id | id | id | ... | id | ent | ... | ent| ... | - |____|____|____|____|______|____|_____|_____|____|_____| - |-> aligned with 4B - |-> aligned with 4B + |-> aligned with 8B + |-> followed closely + + meta_blkaddr blocks |-> another slot + _____________________________________________________________________ + | ... | inode | xattrs | extents | data inline | ... | inode ... + |________|_______|(optional)|(optional)|__(optional)_|_____|__________ + |-> aligned with the inode slot size + . . + . . + . . + . . + . . + . . + .____________________________________________________|-> aligned with 4B + | xattr_ibody_header | shared xattrs | inline xattrs | + |____________________|_______________|_______________| + |-> 12 bytes <-|->x * 4 bytes<-| . + . . . + . . . + . . . + ._______________________________.______________________. + | id | id | id | id | ... | id | ent | ... | ent| ... | + |____|____|____|____|______|____|_____|_____|____|_____| + |-> aligned with 4B + |-> aligned with 4B Inode could be 32 or 64 bytes, which can be distinguished from a common field which all inode versions have -- i_format:: @@ -175,13 +175,13 @@ may not. All metadatas can be now observed in two different spaces (views): Each share xattr can also be directly found by the following formula: xattr offset = xattr_blkaddr * block_size + 4 * xattr_id - :: +:: - |-> aligned by 4 bytes - + xattr_blkaddr blocks |-> aligned with 4 bytes - _________________________________________________________________________ - | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ... - |________|_____________|_____________|_____|______________|_______________ + |-> aligned by 4 bytes + + xattr_blkaddr blocks |-> aligned with 4 bytes + _________________________________________________________________________ + | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ... + |________|_____________|_____________|_____|______________|_______________ Directories ----------- @@ -193,19 +193,18 @@ algorithm (could refer to the related source code). :: - ___________________________ - / | - / ______________|________________ - / / | nameoff1 | nameoffN-1 - ____________.______________._______________v________________v__________ - | dirent | dirent | ... | dirent | filename | filename | ... | filename | - |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____| - \ ^ - \ | * could have - \ | trailing '\0' - \________________________| nameoff0 - - Directory block + ___________________________ + / | + / ______________|________________ + / / | nameoff1 | nameoffN-1 + ____________.______________._______________v________________v__________ + | dirent | dirent | ... | dirent | filename | filename | ... | filename | + |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____| + \ ^ + \ | * could have + \ | trailing '\0' + \________________________| nameoff0 + Directory block Note that apart from the offset of the first filename, nameoff0 also indicates the total number of directory entries in this block since it is no need to @@ -216,22 +215,22 @@ Compression Currently, EROFS supports 4KB fixed-sized output transparent file compression, as illustrated below:: - |---- Variant-Length Extent ----|-------- VLE --------|----- VLE ----- - clusterofs clusterofs clusterofs - | | | logical data - _________v_______________________________v_____________________v_______________ - ... | . | | . | | . | ... - ____|____.________|_____________|________.____|_____________|__.__________|____ - |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-| - size size size size size - . . . . - . . . . - . . . . - _______._____________._____________._____________._____________________ - ... | | | | ... physical data - _______|_____________|_____________|_____________|_____________________ - |-> cluster <-|-> cluster <-|-> cluster <-| - size size size + |<- variable-sized extent ->|<- VLE ->| + clusterofs clusterofs clusterofs + | | | + _________v_________________________________v_______________________v________ + ... | . | | . | | . ... + ____|____._________|______________|________.___ _|______________|__.________ + |-> lcluster <-|-> lcluster <-|-> lcluster <-|-> lcluster <-| + size size size size . . + . . . . + . . . . + . . . . + _______.______________.______________.______________._________________ + ... | | | | ... + _______|______________|______________|______________|_________________ + |-> pcluster <-|-> pcluster <-|-> pcluster <-| + size size size Currently each on-disk physical cluster can contain 4KB (un)compressed data at most. For each logical cluster, there is a corresponding on-disk index to -- GitLab From 46f2e04484aee056c97f79162da83ac7d2d621bb Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Tue, 11 May 2021 16:44:14 +0800 Subject: [PATCH 0327/3804] erofs: update documentation about data compression Add more description about (NON)HEAD lclusters, and the new big pcluster feature. Link: https://lore.kernel.org/r/20210511084414.21305-1-xiang@kernel.org Reviewed-by: Chao Yu Signed-off-by: Gao Xiang --- Documentation/filesystems/erofs.rst | 68 +++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index 869b183ff2158..832839fcf4c3b 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -50,8 +50,8 @@ Here is the main features of EROFS: - Support POSIX.1e ACLs by using xattrs; - - Support transparent file compression as an option: - LZ4 algorithm with 4 KB fixed-sized output compression for high performance. + - Support transparent data compression as an option: + LZ4 algorithm with the fixed-sized output compression for high performance. The following git tree provides the file system user-space tools under development (ex, formatting tool mkfs.erofs): @@ -210,10 +210,21 @@ Note that apart from the offset of the first filename, nameoff0 also indicates the total number of directory entries in this block since it is no need to introduce another on-disk field at all. -Compression ------------ -Currently, EROFS supports 4KB fixed-sized output transparent file compression, -as illustrated below:: +Data compression +---------------- +EROFS implements LZ4 fixed-sized output compression which generates fixed-sized +compressed data blocks from variable-sized input in contrast to other existing +fixed-sized input solutions. Relatively higher compression ratios can be gotten +by using fixed-sized output compression since nowadays popular data compression +algorithms are mostly LZ77-based and such fixed-sized output approach can be +benefited from the historical dictionary (aka. sliding window). + +In details, original (uncompressed) data is turned into several variable-sized +extents and in the meanwhile, compressed into physical clusters (pclusters). +In order to record each variable-sized extent, logical clusters (lclusters) are +introduced as the basic unit of compress indexes to indicate whether a new +extent is generated within the range (HEAD) or not (NONHEAD). Lclusters are now +fixed in block size, as illustrated below:: |<- variable-sized extent ->|<- VLE ->| clusterofs clusterofs clusterofs @@ -222,18 +233,37 @@ as illustrated below:: ... | . | | . | | . ... ____|____._________|______________|________.___ _|______________|__.________ |-> lcluster <-|-> lcluster <-|-> lcluster <-|-> lcluster <-| - size size size size . . - . . . . - . . . . - . . . . - _______.______________.______________.______________._________________ + (HEAD) (NONHEAD) (HEAD) (NONHEAD) . + . CBLKCNT . . + . . . + . . . + _______._____________________________.______________._________________ ... | | | | ... _______|______________|______________|______________|_________________ - |-> pcluster <-|-> pcluster <-|-> pcluster <-| - size size size - -Currently each on-disk physical cluster can contain 4KB (un)compressed data -at most. For each logical cluster, there is a corresponding on-disk index to -describe its cluster type, physical cluster address, etc. - -See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details. + |-> big pcluster <-|-> pcluster <-| + +A physical cluster can be seen as a container of physical compressed blocks +which contains compressed data. Previously, only lcluster-sized (4KB) pclusters +were supported. After big pcluster feature is introduced (available since +Linux v5.13), pcluster can be a multiple of lcluster size. + +For each HEAD lcluster, clusterofs is recorded to indicate where a new extent +starts and blkaddr is used to seek the compressed data. For each NONHEAD +lcluster, delta0 and delta1 are available instead of blkaddr to indicate the +distance to its HEAD lcluster and the next HEAD lcluster. A PLAIN lcluster is +also a HEAD lcluster except that its data is uncompressed. See the comments +around "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details. + +If big pcluster is enabled, pcluster size in lclusters needs to be recorded as +well. Let the delta0 of the first NONHEAD lcluster store the compressed block +count with a special flag as a new called CBLKCNT NONHEAD lcluster. It's easy +to understand its delta0 is constantly 1, as illustrated below:: + + __________________________________________________________ + | HEAD | NONHEAD | NONHEAD | ... | NONHEAD | HEAD | HEAD | + |__:___|_(CBLKCNT)_|_________|_____|_________|__:___|____:_| + |<----- a big pcluster (with CBLKCNT) ------>|<-- -->| + a lcluster-sized pcluster (without CBLKCNT) ^ + +If another HEAD follows a HEAD lcluster, there is no room to record CBLKCNT, +but it's easy to know the size of such pcluster is 1 lcluster as well. -- GitLab From 35f3f8504c3b60a1ae5576e178b27fc0ddd6157d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:12:42 +0300 Subject: [PATCH 0328/3804] spi: Switch to signed types for *_native_cs SPI controller fields While fixing undefined behaviour the commit f60d7270c8a3 ("spi: Avoid undefined behaviour when counting unused native CSs") missed the case when all CSs are GPIOs and thus unused_native_cs will be evaluated to -1 in unsigned representation. This will falsely trigger a condition in the spi_get_gpio_descs(). Switch to signed types for *_native_cs SPI controller fields to fix above. Fixes: f60d7270c8a3 ("spi: Avoid undefined behaviour when counting unused native CSs") Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131242.49455-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 360a3bc767ca0..74239d65c7fd1 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -644,8 +644,8 @@ struct spi_controller { int *cs_gpios; struct gpio_desc **cs_gpiods; bool use_gpio_descriptors; - u8 unused_native_cs; - u8 max_native_cs; + s8 unused_native_cs; + s8 max_native_cs; /* statistics */ struct spi_statistics statistics; -- GitLab From d019f38a1af3c6015cde6a47951a3ec43beeed80 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 11 May 2021 11:53:18 +0800 Subject: [PATCH 0329/3804] regulator: uniphier: Add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1620705198-104566-1-git-send-email-zou_wei@huawei.com Signed-off-by: Mark Brown --- drivers/regulator/uniphier-regulator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/regulator/uniphier-regulator.c b/drivers/regulator/uniphier-regulator.c index 2e02e26b516c4..e75b0973e3256 100644 --- a/drivers/regulator/uniphier-regulator.c +++ b/drivers/regulator/uniphier-regulator.c @@ -201,6 +201,7 @@ static const struct of_device_id uniphier_regulator_match[] = { }, { /* Sentinel */ }, }; +MODULE_DEVICE_TABLE(of, uniphier_regulator_match); static struct platform_driver uniphier_regulator_driver = { .probe = uniphier_regulator_probe, -- GitLab From 86b8bff7e3ac6775113639d88db7448a8b47f0c1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:11:20 +0300 Subject: [PATCH 0330/3804] spi: Convert to use predefined time multipliers We have a lot of hard coded values in nanoseconds or other units. Use predefined constants to make it more clear. While at it, add or amend comments in the corresponding functions. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131120.49253-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index f9885c0965637..407420977a739 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1118,10 +1118,20 @@ static int spi_transfer_wait(struct spi_controller *ctlr, if (!speed_hz) speed_hz = 100000; - ms = 8LL * 1000LL * xfer->len; + /* + * For each byte we wait for 8 cycles of the SPI clock. + * Since speed is defined in Hz and we want milliseconds, + * use respective multiplier, but before the division, + * otherwise we may get 0 for short transfers. + */ + ms = 8LL * MSEC_PER_SEC * xfer->len; do_div(ms, speed_hz); - ms += ms + 200; /* some tolerance */ + /* + * Increase it twice and add 200 ms tolerance, use + * predefined maximum in case of overflow. + */ + ms += ms + 200; if (ms > UINT_MAX) ms = UINT_MAX; @@ -1144,10 +1154,10 @@ static void _spi_transfer_delay_ns(u32 ns) { if (!ns) return; - if (ns <= 1000) { + if (ns <= NSEC_PER_USEC) { ndelay(ns); } else { - u32 us = DIV_ROUND_UP(ns, 1000); + u32 us = DIV_ROUND_UP(ns, NSEC_PER_USEC); if (us <= 10) udelay(us); @@ -1167,21 +1177,25 @@ int spi_delay_to_ns(struct spi_delay *_delay, struct spi_transfer *xfer) switch (unit) { case SPI_DELAY_UNIT_USECS: - delay *= 1000; + delay *= NSEC_PER_USEC; break; - case SPI_DELAY_UNIT_NSECS: /* nothing to do here */ + case SPI_DELAY_UNIT_NSECS: + /* Nothing to do here */ break; case SPI_DELAY_UNIT_SCK: /* clock cycles need to be obtained from spi_transfer */ if (!xfer) return -EINVAL; - /* if there is no effective speed know, then approximate - * by underestimating with half the requested hz + /* + * If there is unknown effective speed, approximate it + * by underestimating with half of the requested hz. */ hz = xfer->effective_speed_hz ?: xfer->speed_hz / 2; if (!hz) return -EINVAL; - delay *= DIV_ROUND_UP(1000000000, hz); + + /* Convert delay to nanoseconds */ + delay *= DIV_ROUND_UP(NSEC_PER_SEC, hz); break; default: return -EINVAL; @@ -1213,6 +1227,7 @@ EXPORT_SYMBOL_GPL(spi_delay_exec); static void _spi_transfer_cs_change_delay(struct spi_message *msg, struct spi_transfer *xfer) { + u32 default_delay_ns = 10 * NSEC_PER_USEC; u32 delay = xfer->cs_change_delay.value; u32 unit = xfer->cs_change_delay.unit; int ret; @@ -1220,16 +1235,16 @@ static void _spi_transfer_cs_change_delay(struct spi_message *msg, /* return early on "fast" mode - for everything but USECS */ if (!delay) { if (unit == SPI_DELAY_UNIT_USECS) - _spi_transfer_delay_ns(10000); + _spi_transfer_delay_ns(default_delay_ns); return; } ret = spi_delay_exec(&xfer->cs_change_delay, xfer); if (ret) { dev_err_once(&msg->spi->dev, - "Use of unsupported delay unit %i, using default of 10us\n", - unit); - _spi_transfer_delay_ns(10000); + "Use of unsupported delay unit %i, using default of %luus\n", + unit, default_delay_ns / NSEC_PER_USEC); + _spi_transfer_delay_ns(default_delay_ns); } } -- GitLab From 532259bfd1c12d561215c32b94cd9bb7c997bc6f Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Tue, 11 May 2021 15:08:42 +0800 Subject: [PATCH 0331/3804] spi: altera: Remove redundant dev_err call in dfl_spi_altera_probe() There is a error message within devm_ioremap_resource already, so remove the dev_err call to avoid redundant error message. Reported-by: Hulk Robot Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1620716922-108572-1-git-send-email-zou_wei@huawei.com Signed-off-by: Mark Brown --- drivers/spi/spi-altera-dfl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/spi/spi-altera-dfl.c b/drivers/spi/spi-altera-dfl.c index 3e32e4fe58950..39a3e1a032e04 100644 --- a/drivers/spi/spi-altera-dfl.c +++ b/drivers/spi/spi-altera-dfl.c @@ -148,10 +148,8 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev) base = devm_ioremap_resource(dev, &dfl_dev->mmio_res); - if (IS_ERR(base)) { - dev_err(dev, "%s get mem resource fail!\n", __func__); + if (IS_ERR(base)) return PTR_ERR(base); - } config_spi_master(base, master); dev_dbg(dev, "%s cs %u bpm 0x%x mode 0x%x\n", __func__, -- GitLab From 91e02557f377b6837d4f82b14229d92cae231001 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 11 May 2021 11:05:00 +0200 Subject: [PATCH 0332/3804] ALSA: usb-audio: Fix potential out-of-bounce access in MIDI EP parser The recently introduced MIDI endpoint parser code has an access to the field without the size validation, hence it might lead to out-of-bounce access. Add the sanity checks for the descriptor sizes. Fixes: eb596e0fd13c ("ALSA: usb-audio: generate midi streaming substream names from jack names") Link: https://lore.kernel.org/r/20210511090500.2637-1-tiwai@suse.de Signed-off-by: Takashi Iwai --- sound/usb/midi.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 649eb8d1ab7dd..2c01649c70f61 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1750,7 +1750,7 @@ static struct usb_midi_in_jack_descriptor *find_usb_in_jack_descriptor( struct usb_midi_in_jack_descriptor *injd = (struct usb_midi_in_jack_descriptor *)extra; - if (injd->bLength > 4 && + if (injd->bLength >= sizeof(*injd) && injd->bDescriptorType == USB_DT_CS_INTERFACE && injd->bDescriptorSubtype == UAC_MIDI_IN_JACK && injd->bJackID == jack_id) @@ -1773,7 +1773,7 @@ static struct usb_midi_out_jack_descriptor *find_usb_out_jack_descriptor( struct usb_midi_out_jack_descriptor *outjd = (struct usb_midi_out_jack_descriptor *)extra; - if (outjd->bLength > 4 && + if (outjd->bLength >= sizeof(*outjd) && outjd->bDescriptorType == USB_DT_CS_INTERFACE && outjd->bDescriptorSubtype == UAC_MIDI_OUT_JACK && outjd->bJackID == jack_id) @@ -1820,7 +1820,8 @@ static void snd_usbmidi_init_substream(struct snd_usb_midi *umidi, outjd = find_usb_out_jack_descriptor(hostif, jack_id); if (outjd) { sz = USB_DT_MIDI_OUT_SIZE(outjd->bNrInputPins); - iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t)); + if (outjd->bLength >= sz) + iJack = *(((uint8_t *) outjd) + sz - sizeof(uint8_t)); } } else { /* and out jacks connect to ins */ -- GitLab From 4b81ccebaeee885ab1aa1438133f2991e3a2b6ea Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 27 Apr 2021 10:12:12 -0300 Subject: [PATCH 0333/3804] bpf, ringbuf: Deny reserve of buffers larger than ringbuf A BPF program might try to reserve a buffer larger than the ringbuf size. If the consumer pointer is way ahead of the producer, that would be successfully reserved, allowing the BPF program to read or write out of the ringbuf allocated area. Reported-by: Ryota Shiga (Flatt Security) Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Alexei Starovoitov --- kernel/bpf/ringbuf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index f25b719ac7868..b86d80c9cd59b 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -315,6 +315,9 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size) return NULL; len = round_up(size + BPF_RINGBUF_HDR_SZ, 8); + if (len > rb->mask + 1) + return NULL; + cons_pos = smp_load_acquire(&rb->consumer_pos); if (in_nmi()) { -- GitLab From 04ea3086c4d73da7009de1e84962a904139af219 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 4 May 2021 16:38:00 -0700 Subject: [PATCH 0334/3804] bpf: Prevent writable memory-mapping of read-only ringbuf pages Only the very first page of BPF ringbuf that contains consumer position counter is supposed to be mapped as writeable by user-space. Producer position is read-only and can be modified only by the kernel code. BPF ringbuf data pages are read-only as well and are not meant to be modified by user-code to maintain integrity of per-record headers. This patch allows to map only consumer position page as writeable and everything else is restricted to be read-only. remap_vmalloc_range() internally adds VM_DONTEXPAND, so all the established memory mappings can't be extended, which prevents any future violations through mremap()'ing. Fixes: 457f44363a88 ("bpf: Implement BPF ring buffer and verifier support for it") Reported-by: Ryota Shiga (Flatt Security) Reported-by: Thadeu Lima de Souza Cascardo Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov --- kernel/bpf/ringbuf.c | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index b86d80c9cd59b..84b3b35fc0d05 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -221,25 +221,20 @@ static int ringbuf_map_get_next_key(struct bpf_map *map, void *key, return -ENOTSUPP; } -static size_t bpf_ringbuf_mmap_page_cnt(const struct bpf_ringbuf *rb) -{ - size_t data_pages = (rb->mask + 1) >> PAGE_SHIFT; - - /* consumer page + producer page + 2 x data pages */ - return RINGBUF_POS_PAGES + 2 * data_pages; -} - static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) { struct bpf_ringbuf_map *rb_map; - size_t mmap_sz; rb_map = container_of(map, struct bpf_ringbuf_map, map); - mmap_sz = bpf_ringbuf_mmap_page_cnt(rb_map->rb) << PAGE_SHIFT; - - if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > mmap_sz) - return -EINVAL; + if (vma->vm_flags & VM_WRITE) { + /* allow writable mapping for the consumer_pos only */ + if (vma->vm_pgoff != 0 || vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EPERM; + } else { + vma->vm_flags &= ~VM_MAYWRITE; + } + /* remap_vmalloc_range() checks size and offset constraints */ return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF); } -- GitLab From ff67dbd554b2aaa22be933eced32610ff90209dd Mon Sep 17 00:00:00 2001 From: Qiu Wenbo Date: Wed, 28 Apr 2021 13:06:36 +0800 Subject: [PATCH 0335/3804] platform/x86: ideapad-laptop: fix a NULL pointer dereference The third parameter of dytc_cql_command should not be NULL since it will be dereferenced immediately. Fixes: ff36b0d953dc4 ("platform/x86: ideapad-laptop: rework and create new ACPI helpers") Signed-off-by: Qiu Wenbo Acked-by: Ike Panhc Link: https://lore.kernel.org/r/20210428050636.8003-1-qiuwenbo@kylinos.com.cn Signed-off-by: Hans de Goede --- drivers/platform/x86/ideapad-laptop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 6cb5ad4be231d..8f871151f0ccb 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -809,6 +809,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, { struct ideapad_dytc_priv *dytc = container_of(pprof, struct ideapad_dytc_priv, pprof); struct ideapad_private *priv = dytc->priv; + unsigned long output; int err; err = mutex_lock_interruptible(&dytc->mutex); @@ -829,7 +830,7 @@ static int dytc_profile_set(struct platform_profile_handler *pprof, /* Determine if we are in CQL mode. This alters the commands we do */ err = dytc_cql_command(priv, DYTC_SET_COMMAND(DYTC_FUNCTION_MMC, perfmode, 1), - NULL); + &output); if (err) goto unlock; } -- GitLab From b09aaa3f2c0edeeed670cd29961a0e35bddc78cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Barnab=C3=A1s=20P=C5=91cze?= Date: Fri, 7 May 2021 23:53:44 +0000 Subject: [PATCH 0336/3804] platform/x86: ideapad-laptop: fix method name typo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "smbc" should be "sbmc". `eval_smbc()` incorrectly called the SMBC ACPI method instead of SBMC. This resulted in partial loss of functionality. Rectify that by calling the correct ACPI method (SBMC), and also rename methods and constants. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=212985 Fixes: 0b765671cb80 ("platform/x86: ideapad-laptop: group and separate (un)related constants into enums") Fixes: ff36b0d953dc ("platform/x86: ideapad-laptop: rework and create new ACPI helpers") Cc: stable@vger.kernel.org # 5.12 Signed-off-by: Barnabás Pőcze Link: https://lore.kernel.org/r/20210507235333.286505-1-pobrn@protonmail.com Signed-off-by: Hans de Goede --- drivers/platform/x86/ideapad-laptop.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 8f871151f0ccb..3878172909219 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -57,8 +57,8 @@ enum { }; enum { - SMBC_CONSERVATION_ON = 3, - SMBC_CONSERVATION_OFF = 5, + SBMC_CONSERVATION_ON = 3, + SBMC_CONSERVATION_OFF = 5, }; enum { @@ -182,9 +182,9 @@ static int eval_gbmd(acpi_handle handle, unsigned long *res) return eval_int(handle, "GBMD", res); } -static int exec_smbc(acpi_handle handle, unsigned long arg) +static int exec_sbmc(acpi_handle handle, unsigned long arg) { - return exec_simple_method(handle, "SMBC", arg); + return exec_simple_method(handle, "SBMC", arg); } static int eval_hals(acpi_handle handle, unsigned long *res) @@ -477,7 +477,7 @@ static ssize_t conservation_mode_store(struct device *dev, if (err) return err; - err = exec_smbc(priv->adev->handle, state ? SMBC_CONSERVATION_ON : SMBC_CONSERVATION_OFF); + err = exec_sbmc(priv->adev->handle, state ? SBMC_CONSERVATION_ON : SBMC_CONSERVATION_OFF); if (err) return err; -- GitLab From 79d341e26ebcdbc622348aaaab6f8f89b6fdb25f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Fri, 30 Apr 2021 14:07:35 +0800 Subject: [PATCH 0337/3804] platform/x86: hp_accel: Avoid invoking _INI to speed up resume MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hp_accel can take almost two seconds to resume on some HP laptops. The bottleneck is on evaluating _INI, which is only needed to run once. Resolve the issue by only invoking _INI when it's necessary. Namely, on probe and on hibernation restore. Signed-off-by: Kai-Heng Feng Acked-by: Éric Piel Link: https://lore.kernel.org/r/20210430060736.590321-1-kai.heng.feng@canonical.com Signed-off-by: Hans de Goede --- drivers/misc/lis3lv02d/lis3lv02d.h | 1 + drivers/platform/x86/hp_accel.c | 22 +++++++++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/misc/lis3lv02d/lis3lv02d.h b/drivers/misc/lis3lv02d/lis3lv02d.h index c394c0b08519a..7ac788fae1b86 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.h +++ b/drivers/misc/lis3lv02d/lis3lv02d.h @@ -271,6 +271,7 @@ struct lis3lv02d { int regs_size; u8 *reg_cache; bool regs_stored; + bool init_required; u8 odr_mask; /* ODR bit mask */ u8 whoami; /* indicates measurement precision */ s16 (*read_data) (struct lis3lv02d *lis3, int reg); diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 799cbe2ffcf36..8c0867bda8280 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -88,6 +88,9 @@ MODULE_DEVICE_TABLE(acpi, lis3lv02d_device_ids); static int lis3lv02d_acpi_init(struct lis3lv02d *lis3) { struct acpi_device *dev = lis3->bus_priv; + if (!lis3->init_required) + return 0; + if (acpi_evaluate_object(dev->handle, METHOD_NAME__INI, NULL, NULL) != AE_OK) return -EINVAL; @@ -356,6 +359,7 @@ static int lis3lv02d_add(struct acpi_device *device) } /* call the core layer do its init */ + lis3_dev.init_required = true; ret = lis3lv02d_init_device(&lis3_dev); if (ret) return ret; @@ -403,11 +407,27 @@ static int lis3lv02d_suspend(struct device *dev) static int lis3lv02d_resume(struct device *dev) { + lis3_dev.init_required = false; + lis3lv02d_poweron(&lis3_dev); + return 0; +} + +static int lis3lv02d_restore(struct device *dev) +{ + lis3_dev.init_required = true; lis3lv02d_poweron(&lis3_dev); return 0; } -static SIMPLE_DEV_PM_OPS(hp_accel_pm, lis3lv02d_suspend, lis3lv02d_resume); +static const struct dev_pm_ops hp_accel_pm = { + .suspend = lis3lv02d_suspend, + .resume = lis3lv02d_resume, + .freeze = lis3lv02d_suspend, + .thaw = lis3lv02d_resume, + .poweroff = lis3lv02d_suspend, + .restore = lis3lv02d_restore, +}; + #define HP_ACCEL_PM (&hp_accel_pm) #else #define HP_ACCEL_PM NULL -- GitLab From a5c936add6a23c15c6ae538ab7a12f80751fdf0f Mon Sep 17 00:00:00 2001 From: Kai-Heng Feng Date: Wed, 21 Apr 2021 13:20:31 +0800 Subject: [PATCH 0338/3804] drm/i915/dp: Use slow and wide link training for everything MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Screen flickers on Innolux eDP 1.3 panel when clock rate 540000 is in use. According to the panel vendor, though clock rate 540000 is advertised, but the max clock rate it really supports is 270000. Ville Syrjälä mentioned that fast and narrow also breaks some eDP 1.4 panel, so use slow and wide training for all panels to resolve the issue. User also confirmed that the new strategy doesn't introduce any regression on XPS 9380. v2: - Use slow and wide for everything. Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3384 References: https://gitlab.freedesktop.org/drm/intel/-/issues/272 Signed-off-by: Kai-Heng Feng Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210421052054.1434718-1-kai.heng.feng@canonical.com (cherry picked from commit acca7762eb71bc05a8f28d29320d193150051f79) Fixes: 2bbd6dba84d4 ("drm/i915: Try to use fast+narrow link on eDP again and fall back to the old max strategy on failure") Cc: # v5.12+ Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 59 +++---------------------- 1 file changed, 5 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 6a2dee8cef1f1..1e026177ed1ba 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -1095,44 +1095,6 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, return -EINVAL; } -/* Optimize link config in order: max bpp, min lanes, min clock */ -static int -intel_dp_compute_link_config_fast(struct intel_dp *intel_dp, - struct intel_crtc_state *pipe_config, - const struct link_config_limits *limits) -{ - const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode; - int bpp, clock, lane_count; - int mode_rate, link_clock, link_avail; - - for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) { - int output_bpp = intel_dp_output_bpp(pipe_config->output_format, bpp); - - mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, - output_bpp); - - for (lane_count = limits->min_lane_count; - lane_count <= limits->max_lane_count; - lane_count <<= 1) { - for (clock = limits->min_clock; clock <= limits->max_clock; clock++) { - link_clock = intel_dp->common_rates[clock]; - link_avail = intel_dp_max_data_rate(link_clock, - lane_count); - - if (mode_rate <= link_avail) { - pipe_config->lane_count = lane_count; - pipe_config->pipe_bpp = bpp; - pipe_config->port_clock = link_clock; - - return 0; - } - } - } - } - - return -EINVAL; -} - static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc) { int i, num_bpc; @@ -1382,22 +1344,11 @@ intel_dp_compute_link_config(struct intel_encoder *encoder, intel_dp_can_bigjoiner(intel_dp)) pipe_config->bigjoiner = true; - if (intel_dp_is_edp(intel_dp)) - /* - * Optimize for fast and narrow. eDP 1.3 section 3.3 and eDP 1.4 - * section A.1: "It is recommended that the minimum number of - * lanes be used, using the minimum link rate allowed for that - * lane configuration." - * - * Note that we fall back to the max clock and lane count for eDP - * panels that fail with the fast optimal settings (see - * intel_dp->use_max_params), in which case the fast vs. wide - * choice doesn't matter. - */ - ret = intel_dp_compute_link_config_fast(intel_dp, pipe_config, &limits); - else - /* Optimize for slow and wide. */ - ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits); + /* + * Optimize for slow and wide for everything, because there are some + * eDP 1.3 and 1.4 panels don't work well with fast and narrow. + */ + ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits); /* enable compression if the mode doesn't fit available BW */ drm_dbg_kms(&i915->drm, "Force DSC en = %d\n", intel_dp->force_dsc_en); -- GitLab From 9b8a233bc294dd71d3c7d30692a78ab32f246a0f Mon Sep 17 00:00:00 2001 From: Ritesh Harjani Date: Fri, 30 Apr 2021 21:30:55 +0530 Subject: [PATCH 0339/3804] btrfs: handle transaction start error in btrfs_fileattr_set Add error handling in btrfs_fileattr_set in case of an error while starting a transaction. This fixes btrfs/232 which otherwise used to fail with below signature on Power. btrfs/232 [ 1119.474650] run fstests btrfs/232 at 2021-04-21 02:21:22 <...> [ 1366.638585] BUG: Unable to handle kernel data access on read at 0xffffffffffffff86 [ 1366.638768] Faulting instruction address: 0xc0000000009a5c88 cpu 0x0: Vector: 380 (Data SLB Access) at [c000000014f177b0] pc: c0000000009a5c88: btrfs_update_root_times+0x58/0xc0 lr: c0000000009a5c84: btrfs_update_root_times+0x54/0xc0 <...> pid = 24881, comm = fsstress btrfs_update_inode+0xa0/0x140 btrfs_fileattr_set+0x5d0/0x6f0 vfs_fileattr_set+0x2a8/0x390 do_vfs_ioctl+0x1290/0x1ac0 sys_ioctl+0x6c/0x120 system_call_exception+0x3d4/0x410 system_call_common+0xec/0x278 Fixes: 97fc29775487 ("btrfs: convert to fileattr") Signed-off-by: Ritesh Harjani Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/ioctl.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ee1dbabb5d3c4..98ecb70466bf3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -259,6 +259,8 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns, if (!fa->flags_valid) { /* 1 item for the inode */ trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); goto update_flags; } -- GitLab From f2be77fee648ddd6d0d259d3527344ba0120e314 Mon Sep 17 00:00:00 2001 From: Elia Devito Date: Tue, 11 May 2021 14:46:49 +0200 Subject: [PATCH 0340/3804] ALSA: hda/realtek: Add fixup for HP Spectre x360 15-df0xxx Fixup to enable all 4 speaker on HP Spectre x360 15-df0xxx and probably on similar models. 0x14 pin config override is required to enable all speakers and alc285-speaker2-to-dac1 fixup to enable volume adjustment. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=189331 Signed-off-by: Elia Devito Cc: Link: https://lore.kernel.org/r/20210511124651.4802-1-eliadevito@gmail.com Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index b4b71609dff11..3e269de840799 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6542,6 +6542,7 @@ enum { ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST, ALC295_FIXUP_ASUS_DACS, ALC295_FIXUP_HP_OMEN, + ALC285_FIXUP_HP_SPECTRE_X360, }; static const struct hda_fixup alc269_fixups[] = { @@ -8099,6 +8100,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HP_LINE1_MIC1_LED, }, + [ALC285_FIXUP_HP_SPECTRE_X360] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x14, 0x90170110 }, /* enable top speaker */ + {} + }, + .chained = true, + .chain_id = ALC285_FIXUP_SPEAKER2_TO_DAC1, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8259,6 +8269,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), + SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), @@ -8665,6 +8676,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, {.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"}, {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, + {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {} }; #define ALC225_STANDARD_PINS \ -- GitLab From efed9a3337e341bd0989161b97453b52567bc59d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 May 2021 17:05:35 -0700 Subject: [PATCH 0341/3804] kyber: fix out of bounds access when preempted __blk_mq_sched_bio_merge() gets the ctx and hctx for the current CPU and passes the hctx to ->bio_merge(). kyber_bio_merge() then gets the ctx for the current CPU again and uses that to get the corresponding Kyber context in the passed hctx. However, the thread may be preempted between the two calls to blk_mq_get_ctx(), and the ctx returned the second time may no longer correspond to the passed hctx. This "works" accidentally most of the time, but it can cause us to read garbage if the second ctx came from an hctx with more ctx's than the first one (i.e., if ctx->index_hw[hctx->type] > hctx->nr_ctx). This manifested as this UBSAN array index out of bounds error reported by Jakub: UBSAN: array-index-out-of-bounds in ../kernel/locking/qspinlock.c:130:9 index 13106 is out of range for type 'long unsigned int [128]' Call Trace: dump_stack+0xa4/0xe5 ubsan_epilogue+0x5/0x40 __ubsan_handle_out_of_bounds.cold.13+0x2a/0x34 queued_spin_lock_slowpath+0x476/0x480 do_raw_spin_lock+0x1c2/0x1d0 kyber_bio_merge+0x112/0x180 blk_mq_submit_bio+0x1f5/0x1100 submit_bio_noacct+0x7b0/0x870 submit_bio+0xc2/0x3a0 btrfs_map_bio+0x4f0/0x9d0 btrfs_submit_data_bio+0x24e/0x310 submit_one_bio+0x7f/0xb0 submit_extent_page+0xc4/0x440 __extent_writepage_io+0x2b8/0x5e0 __extent_writepage+0x28d/0x6e0 extent_write_cache_pages+0x4d7/0x7a0 extent_writepages+0xa2/0x110 do_writepages+0x8f/0x180 __writeback_single_inode+0x99/0x7f0 writeback_sb_inodes+0x34e/0x790 __writeback_inodes_wb+0x9e/0x120 wb_writeback+0x4d2/0x660 wb_workfn+0x64d/0xa10 process_one_work+0x53a/0xa80 worker_thread+0x69/0x5b0 kthread+0x20b/0x240 ret_from_fork+0x1f/0x30 Only Kyber uses the hctx, so fix it by passing the request_queue to ->bio_merge() instead. BFQ and mq-deadline just use that, and Kyber can map the queues itself to avoid the mismatch. Fixes: a6088845c2bf ("block: kyber: make kyber more friendly with merging") Reported-by: Jakub Kicinski Signed-off-by: Omar Sandoval Link: https://lore.kernel.org/r/c7598605401a48d5cfeadebb678abd10af22b83f.1620691329.git.osandov@fb.com Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 3 +-- block/blk-mq-sched.c | 8 +++++--- block/kyber-iosched.c | 5 +++-- block/mq-deadline.c | 3 +-- include/linux/elevator.h | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 0270cd7ca1658..59b2499d3f8be 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2263,10 +2263,9 @@ static void bfq_remove_request(struct request_queue *q, } -static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool bfq_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { - struct request_queue *q = hctx->queue; struct bfq_data *bfqd = q->elevator->elevator_data; struct request *free = NULL; /* diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 42a365b1b9c0e..996a4b2f73aa9 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -358,14 +358,16 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { struct elevator_queue *e = q->elevator; - struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); - struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); + struct blk_mq_ctx *ctx; + struct blk_mq_hw_ctx *hctx; bool ret = false; enum hctx_type type; if (e && e->type->ops.bio_merge) - return e->type->ops.bio_merge(hctx, bio, nr_segs); + return e->type->ops.bio_merge(q, bio, nr_segs); + ctx = blk_mq_get_ctx(q); + hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); type = hctx->type; if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) || list_empty_careful(&ctx->rq_lists[type])) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index 8969e122f0811..81e3279ecd574 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -561,11 +561,12 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data) } } -static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool kyber_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { + struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); + struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx); struct kyber_hctx_data *khd = hctx->sched_data; - struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue); struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]]; unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); struct list_head *rq_list = &kcq->rq_list[sched_domain]; diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 04aded71ead27..8eea2cbf2bf4a 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -461,10 +461,9 @@ static int dd_request_merge(struct request_queue *q, struct request **rq, return ELEVATOR_NO_MERGE; } -static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, +static bool dd_bio_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs) { - struct request_queue *q = hctx->queue; struct deadline_data *dd = q->elevator->elevator_data; struct request *free = NULL; bool ret; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 1fe8e105b83bf..dcb2f9022c1df 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -34,7 +34,7 @@ struct elevator_mq_ops { void (*depth_updated)(struct blk_mq_hw_ctx *); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); - bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int); + bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int); int (*request_merge)(struct request_queue *q, struct request **, struct bio *); void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); -- GitLab From 0919a3acc0c87049a7d787c4b8b9e64bd7c59eb3 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 11 May 2021 10:17:07 +0900 Subject: [PATCH 0342/3804] ASoC: simple-card: add simple_parse_node() Original commit 59c35c44a9cf89a83a9 ("ASoC: simple-card: add simple_parse_node()") was reverted, and this is remake version. Parse dai/tdm/clk are common for both CPU/Codec node. This patch creates simple_parse_node() for it and share the code. Reported-by: "kernelci.org bot" Fixes: 25c4a9b614f101bb9f3 ("ASoC: simple-card: Fix breakage on kontron-sl28-var3-ads2") Fixes: 59c35c44a9cf89a83a9 ("ASoC: simple-card: add simple_parse_node()") Signed-off-by: Kuninori Morimoto Tested-by: Michael Walle Link: https://lore.kernel.org/r/87h7jaax2k.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card.c | 107 ++++++++++++++++---------------- 1 file changed, 53 insertions(+), 54 deletions(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index a1373be4558f3..57ab89be1b4b7 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -93,12 +93,11 @@ static void simple_parse_convert(struct device *dev, } static void simple_parse_mclk_fs(struct device_node *top, - struct device_node *cpu, - struct device_node *codec, + struct device_node *np, struct simple_dai_props *props, char *prefix) { - struct device_node *node = of_get_parent(cpu); + struct device_node *node = of_get_parent(np); char prop[128]; snprintf(prop, sizeof(prop), "%smclk-fs", PREFIX); @@ -106,12 +105,50 @@ static void simple_parse_mclk_fs(struct device_node *top, snprintf(prop, sizeof(prop), "%smclk-fs", prefix); of_property_read_u32(node, prop, &props->mclk_fs); - of_property_read_u32(cpu, prop, &props->mclk_fs); - of_property_read_u32(codec, prop, &props->mclk_fs); + of_property_read_u32(np, prop, &props->mclk_fs); of_node_put(node); } +static int simple_parse_node(struct asoc_simple_priv *priv, + struct device_node *np, + struct link_info *li, + char *prefix, + int *cpu) +{ + struct device *dev = simple_priv_to_dev(priv); + struct device_node *top = dev->of_node; + struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); + struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); + struct snd_soc_dai_link_component *dlc; + struct asoc_simple_dai *dai; + int ret; + + if (cpu) { + dlc = asoc_link_to_cpu(dai_link, 0); + dai = simple_props_to_dai_cpu(dai_props, 0); + } else { + dlc = asoc_link_to_codec(dai_link, 0); + dai = simple_props_to_dai_codec(dai_props, 0); + } + + simple_parse_mclk_fs(top, np, dai_props, prefix); + + ret = asoc_simple_parse_dai(np, dlc, cpu); + if (ret) + return ret; + + ret = asoc_simple_parse_clk(dev, np, dai, dlc); + if (ret) + return ret; + + ret = asoc_simple_parse_tdm(np, dai); + if (ret) + return ret; + + return 0; +} + static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, struct device_node *np, struct device_node *codec, @@ -121,10 +158,6 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, struct device *dev = simple_priv_to_dev(priv); struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); - struct asoc_simple_dai *dai; - struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); - struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); - struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0); struct device_node *top = dev->of_node; struct device_node *node = of_get_parent(np); char *prefix = ""; @@ -132,13 +165,13 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, dev_dbg(dev, "link_of DPCM (%pOF)\n", np); - li->link++; - /* For single DAI link & old style of DT node */ if (is_top) prefix = PREFIX; if (li->cpu) { + struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); + struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0); int is_single_links = 0; /* Codec is dummy */ @@ -147,13 +180,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->dynamic = 1; dai_link->dpcm_merged_format = 1; - dai = simple_props_to_dai_cpu(dai_props, 0); - - ret = asoc_simple_parse_dai(np, cpus, &is_single_links); - if (ret) - goto out_put_node; - - ret = asoc_simple_parse_clk(dev, np, dai, cpus); + ret = simple_parse_node(priv, np, li, prefix, &is_single_links); if (ret < 0) goto out_put_node; @@ -166,6 +193,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, asoc_simple_canonicalize_cpu(cpus, is_single_links); asoc_simple_canonicalize_platform(platforms, cpus); } else { + struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); struct snd_soc_codec_conf *cconf; /* CPU is dummy */ @@ -174,14 +202,9 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->no_pcm = 1; dai_link->be_hw_params_fixup = asoc_simple_be_hw_params_fixup; - dai = simple_props_to_dai_codec(dai_props, 0); cconf = simple_props_to_codec_conf(dai_props, 0); - ret = asoc_simple_parse_dai(np, codecs, NULL); - if (ret < 0) - goto out_put_node; - - ret = asoc_simple_parse_clk(dev, np, dai, codecs); + ret = simple_parse_node(priv, np, li, prefix, NULL); if (ret < 0) goto out_put_node; @@ -201,11 +224,6 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, } simple_parse_convert(dev, np, &dai_props->adata); - simple_parse_mclk_fs(top, np, codec, dai_props, prefix); - - ret = asoc_simple_parse_tdm(np, dai); - if (ret) - goto out_put_node; ret = asoc_simple_parse_daifmt(dev, node, codec, prefix, &dai_link->dai_fmt); @@ -218,6 +236,8 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->init = asoc_simple_dai_init; out_put_node: + li->link++; + of_node_put(node); return ret; } @@ -230,13 +250,9 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, { struct device *dev = simple_priv_to_dev(priv); struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); - struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); - struct asoc_simple_dai *cpu_dai = simple_props_to_dai_cpu(dai_props, 0); - struct asoc_simple_dai *codec_dai = simple_props_to_dai_codec(dai_props, 0); struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0); - struct device_node *top = dev->of_node; struct device_node *cpu = NULL; struct device_node *node = NULL; struct device_node *plat = NULL; @@ -246,7 +262,6 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, cpu = np; node = of_get_parent(np); - li->link++; dev_dbg(dev, "link_of (%pOF)\n", node); @@ -262,13 +277,11 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, if (ret < 0) goto dai_link_of_err; - simple_parse_mclk_fs(top, cpu, codec, dai_props, prefix); - - ret = asoc_simple_parse_dai(cpu, cpus, &single_cpu); + ret = simple_parse_node(priv, cpu, li, prefix, &single_cpu); if (ret < 0) goto dai_link_of_err; - ret = asoc_simple_parse_dai(codec, codecs, NULL); + ret = simple_parse_node(priv, codec, li, prefix, NULL); if (ret < 0) goto dai_link_of_err; @@ -276,22 +289,6 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, if (ret < 0) goto dai_link_of_err; - ret = asoc_simple_parse_tdm(cpu, cpu_dai); - if (ret < 0) - goto dai_link_of_err; - - ret = asoc_simple_parse_tdm(codec, codec_dai); - if (ret < 0) - goto dai_link_of_err; - - ret = asoc_simple_parse_clk(dev, cpu, cpu_dai, cpus); - if (ret < 0) - goto dai_link_of_err; - - ret = asoc_simple_parse_clk(dev, codec, codec_dai, codecs); - if (ret < 0) - goto dai_link_of_err; - ret = asoc_simple_set_dailink_name(dev, dai_link, "%s-%s", cpus->dai_name, @@ -309,6 +306,8 @@ dai_link_of_err: of_node_put(plat); of_node_put(node); + li->link++; + return ret; } -- GitLab From 6ad76b573bb63ef229cf60386cc38c6e7c7625d7 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 11 May 2021 10:17:47 +0900 Subject: [PATCH 0343/3804] ASoC: simple-card: add simple_link_init() Original commit 434392271afcff350fe ("ASoC: simple-card: add simple_link_init()") are rejected, and this is remake version of it. This patch adds simple_link_init() and share dai_link setting code. Reported-by: "kernelci.org bot" Fixes: 25c4a9b614f101bb9f3 ("ASoC: simple-card: Fix breakage on kontron-sl28-var3-ads2") Fixes: 434392271afcff350fe ("ASoC: simple-card: add simple_link_init()") Signed-off-by: Kuninori Morimoto Tested-by: Michael Walle Link: https://lore.kernel.org/r/87fsyuax1g.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/simple-card.c | 61 ++++++++++++++++----------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index 57ab89be1b4b7..0015f534d42d9 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -149,6 +149,27 @@ static int simple_parse_node(struct asoc_simple_priv *priv, return 0; } +static int simple_link_init(struct asoc_simple_priv *priv, + struct device_node *node, + struct device_node *codec, + struct link_info *li, + char *prefix, char *name) +{ + struct device *dev = simple_priv_to_dev(priv); + struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); + int ret; + + ret = asoc_simple_parse_daifmt(dev, node, codec, + prefix, &dai_link->dai_fmt); + if (ret < 0) + return 0; + + dai_link->init = asoc_simple_dai_init; + dai_link->ops = &simple_ops; + + return asoc_simple_set_dailink_name(dev, dai_link, name); +} + static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, struct device_node *np, struct device_node *codec, @@ -161,6 +182,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, struct device_node *top = dev->of_node; struct device_node *node = of_get_parent(np); char *prefix = ""; + char dai_name[64]; int ret; dev_dbg(dev, "link_of DPCM (%pOF)\n", np); @@ -184,11 +206,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, if (ret < 0) goto out_put_node; - ret = asoc_simple_set_dailink_name(dev, dai_link, - "fe.%s", - cpus->dai_name); - if (ret < 0) - goto out_put_node; + snprintf(dai_name, sizeof(dai_name), "fe.%s", cpus->dai_name); asoc_simple_canonicalize_cpu(cpus, is_single_links); asoc_simple_canonicalize_platform(platforms, cpus); @@ -208,11 +226,7 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, if (ret < 0) goto out_put_node; - ret = asoc_simple_set_dailink_name(dev, dai_link, - "be.%s", - codecs->dai_name); - if (ret < 0) - goto out_put_node; + snprintf(dai_name, sizeof(dai_name), "be.%s", codecs->dai_name); /* check "prefix" from top node */ snd_soc_of_parse_node_prefix(top, cconf, codecs->of_node, @@ -225,15 +239,9 @@ static int simple_dai_link_of_dpcm(struct asoc_simple_priv *priv, simple_parse_convert(dev, np, &dai_props->adata); - ret = asoc_simple_parse_daifmt(dev, node, codec, - prefix, &dai_link->dai_fmt); - if (ret < 0) - goto out_put_node; - snd_soc_dai_link_set_capabilities(dai_link); - dai_link->ops = &simple_ops; - dai_link->init = asoc_simple_dai_init; + ret = simple_link_init(priv, node, codec, li, prefix, dai_name); out_put_node: li->link++; @@ -256,6 +264,7 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, struct device_node *cpu = NULL; struct device_node *node = NULL; struct device_node *plat = NULL; + char dai_name[64]; char prop[128]; char *prefix = ""; int ret, single_cpu = 0; @@ -272,11 +281,6 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, snprintf(prop, sizeof(prop), "%splat", prefix); plat = of_get_child_by_name(node, prop); - ret = asoc_simple_parse_daifmt(dev, node, codec, - prefix, &dai_link->dai_fmt); - if (ret < 0) - goto dai_link_of_err; - ret = simple_parse_node(priv, cpu, li, prefix, &single_cpu); if (ret < 0) goto dai_link_of_err; @@ -289,19 +293,14 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, if (ret < 0) goto dai_link_of_err; - ret = asoc_simple_set_dailink_name(dev, dai_link, - "%s-%s", - cpus->dai_name, - codecs->dai_name); - if (ret < 0) - goto dai_link_of_err; - - dai_link->ops = &simple_ops; - dai_link->init = asoc_simple_dai_init; + snprintf(dai_name, sizeof(dai_name), + "%s-%s", cpus->dai_name, codecs->dai_name); asoc_simple_canonicalize_cpu(cpus, single_cpu); asoc_simple_canonicalize_platform(platforms, cpus); + ret = simple_link_init(priv, node, codec, li, prefix, dai_name); + dai_link_of_err: of_node_put(plat); of_node_put(node); -- GitLab From 28c268d3acdd4cbcd2ac320b85609e77f84e74a7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 8 May 2021 17:01:45 +0200 Subject: [PATCH 0344/3804] ASoC: Intel: bytcr_rt5640: Add quirk for the Glavey TM800A550L tablet Add a quirk for the Glavey TM800A550L tablet, this BYTCR tablet has no CHAN package in its ACPI tables and uses SSP0-AIF1 rather then SSP0-AIF2 which is the default for BYTCR devices. Signed-off-by: Hans de Goede Acked-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/20210508150146.28403-1-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index df2f5d55e8ffe..b42fa292d408a 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -574,6 +574,17 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { /* Glavey TM800A550L */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), + /* Above strings are too generic, also match on BIOS version */ + DMI_MATCH(DMI_BIOS_VERSION, "ZY-8-BI-PX4S70VTR400-X423B-005-D"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), -- GitLab From f0353e1f53f92f7b3da91e6669f5d58ee222ebe8 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 8 May 2021 17:01:46 +0200 Subject: [PATCH 0345/3804] ASoC: Intel: bytcr_rt5640: Add quirk for the Lenovo Miix 3-830 tablet The Lenovo Miix 3-830 tablet has only 1 speaker, has an internal analog mic on IN1 and uses JD2 for jack-detect, add a quirk to automatically apply these settings on Lenovo Miix 3-830 tablets. Signed-off-by: Hans de Goede Link: https://lore.kernel.org/r/20210508150146.28403-2-hdegoede@redhat.com Signed-off-by: Mark Brown --- sound/soc/intel/boards/bytcr_rt5640.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index b42fa292d408a..22dbd9d93c1ef 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -663,6 +663,20 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_MONO_SPEAKER | BYT_RT5640_MCLK_EN), }, + { /* Lenovo Miix 3-830 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 3-830"), + }, + .driver_data = (void *)(BYT_RT5640_IN1_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_2000UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { /* Linx Linx7 tablet */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LINX"), -- GitLab From 40b82c2d9a78593201a3a62dc9239d6405334561 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:12:11 +0300 Subject: [PATCH 0346/3804] spi: Use SPI_MODE_X_MASK Use SPI_MODE_X_MASK instead of open coded variant. While at it, fix format specifier and drop explicit casting. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131217.49357-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 407420977a739..956dce3aafcad 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -3441,8 +3441,8 @@ int spi_setup(struct spi_device *spi) spi_set_thread_rt(spi->controller); } - dev_dbg(&spi->dev, "setup mode %d, %s%s%s%s%u bits/w, %u Hz max --> %d\n", - (int) (spi->mode & (SPI_CPOL | SPI_CPHA)), + dev_dbg(&spi->dev, "setup mode %lu, %s%s%s%s%u bits/w, %u Hz max --> %d\n", + spi->mode & SPI_MODE_X_MASK, (spi->mode & SPI_CS_HIGH) ? "cs_high, " : "", (spi->mode & SPI_LSB_FIRST) ? "lsb, " : "", (spi->mode & SPI_3WIRE) ? "3wire, " : "", -- GitLab From dd507b5ec7ba44ab51e1a8404d04e815a91b472f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:12:12 +0300 Subject: [PATCH 0347/3804] spi: spidev: Use SPI_MODE_X_MASK Use SPI_MODE_X_MASK instead of open coded variant. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131217.49357-2-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spidev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index f56e0e975a469..24e9469ea35bb 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -59,7 +59,7 @@ static DECLARE_BITMAP(minors, N_SPI_MINORS); * * REVISIT should changing those flags be privileged? */ -#define SPI_MODE_MASK (SPI_CPHA | SPI_CPOL | SPI_CS_HIGH \ +#define SPI_MODE_MASK (SPI_MODE_X_MASK | SPI_CS_HIGH \ | SPI_LSB_FIRST | SPI_3WIRE | SPI_LOOP \ | SPI_NO_CS | SPI_READY | SPI_TX_DUAL \ | SPI_TX_QUAD | SPI_TX_OCTAL | SPI_RX_DUAL \ -- GitLab From 56f47edf33fb55ab9381f61d60cf34c7578f3d75 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:12:13 +0300 Subject: [PATCH 0348/3804] spi: npcm-pspi: Use SPI_MODE_X_MASK Use SPI_MODE_X_MASK instead of open coded variant. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131217.49357-3-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-npcm-pspi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-npcm-pspi.c b/drivers/spi/spi-npcm-pspi.c index 56d10c4511db4..1668a347e003d 100644 --- a/drivers/spi/spi-npcm-pspi.c +++ b/drivers/spi/spi-npcm-pspi.c @@ -105,7 +105,7 @@ static void npcm_pspi_set_mode(struct spi_device *spi) u16 regtemp; u16 mode_val; - switch (spi->mode & (SPI_CPOL | SPI_CPHA)) { + switch (spi->mode & SPI_MODE_X_MASK) { case SPI_MODE_0: mode_val = 0; break; -- GitLab From a2f2db6b2a8708f6ac592a362e34fb330f874cea Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:12:14 +0300 Subject: [PATCH 0349/3804] spi: oc-tiny: Use SPI_MODE_X_MASK Use SPI_MODE_X_MASK instead of open coded variant. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131217.49357-4-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-oc-tiny.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-oc-tiny.c b/drivers/spi/spi-oc-tiny.c index f3843f0ff2607..38c14c4e4e212 100644 --- a/drivers/spi/spi-oc-tiny.c +++ b/drivers/spi/spi-oc-tiny.c @@ -86,7 +86,7 @@ static int tiny_spi_setup(struct spi_device *spi) hw->speed_hz = spi->max_speed_hz; hw->baud = tiny_spi_baud(spi, hw->speed_hz); } - hw->mode = spi->mode & (SPI_CPOL | SPI_CPHA); + hw->mode = spi->mode & SPI_MODE_X_MASK; return 0; } -- GitLab From fdb217a38808e041f6eca8c550f1b5981e401a45 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:12:15 +0300 Subject: [PATCH 0350/3804] spi: omap-uwire: Use SPI_MODE_X_MASK Use SPI_MODE_X_MASK instead of open coded variant. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131217.49357-5-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-omap-uwire.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-omap-uwire.c b/drivers/spi/spi-omap-uwire.c index 71402f71ddd85..ceb479f5c88fe 100644 --- a/drivers/spi/spi-omap-uwire.c +++ b/drivers/spi/spi-omap-uwire.c @@ -330,7 +330,7 @@ static int uwire_setup_transfer(struct spi_device *spi, struct spi_transfer *t) if (spi->mode & SPI_CPOL) flags |= UWIRE_CLK_INVERTED; - switch (spi->mode & (SPI_CPOL | SPI_CPHA)) { + switch (spi->mode & SPI_MODE_X_MASK) { case SPI_MODE_0: case SPI_MODE_3: flags |= UWIRE_WRITE_FALLING_EDGE | UWIRE_READ_RISING_EDGE; -- GitLab From 4ccf05579b9d0f15443a0edc860e2be7472ccfc1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:12:16 +0300 Subject: [PATCH 0351/3804] spi: ppc4xx: Use SPI_MODE_X_MASK Use SPI_MODE_X_MASK instead of open coded variant. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131217.49357-6-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-ppc4xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-ppc4xx.c b/drivers/spi/spi-ppc4xx.c index 76874a7cca9b5..59d201acbb394 100644 --- a/drivers/spi/spi-ppc4xx.c +++ b/drivers/spi/spi-ppc4xx.c @@ -235,7 +235,7 @@ static int spi_ppc4xx_setup(struct spi_device *spi) */ cs->mode = SPI_PPC4XX_MODE_SPE; - switch (spi->mode & (SPI_CPHA | SPI_CPOL)) { + switch (spi->mode & SPI_MODE_X_MASK) { case SPI_MODE_0: cs->mode |= SPI_CLK_MODE0; break; -- GitLab From 038b9de42269f33aca3e3741214c863a4e9328d0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 16:12:17 +0300 Subject: [PATCH 0352/3804] spi: uniphier: Use SPI_MODE_X_MASK Use SPI_MODE_X_MASK instead of open coded variant. Signed-off-by: Andy Shevchenko Link: https://lore.kernel.org/r/20210510131217.49357-7-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi-uniphier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c index 6a9ef8ee3cc90..8900e51e1a1cc 100644 --- a/drivers/spi/spi-uniphier.c +++ b/drivers/spi/spi-uniphier.c @@ -142,7 +142,7 @@ static void uniphier_spi_set_mode(struct spi_device *spi) * FSTRT start frame timing * 0: rising edge of clock, 1: falling edge of clock */ - switch (spi->mode & (SPI_CPOL | SPI_CPHA)) { + switch (spi->mode & SPI_MODE_X_MASK) { case SPI_MODE_0: /* CKPHS=1, CKINIT=0, CKDLY=1, FSTRT=0 */ val1 = SSI_CKS_CKPHS | SSI_CKS_CKDLY; -- GitLab From f8090ffc91ffd788a73d4e6b5ca3107c94d9ec27 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 11 May 2021 10:17:57 +0900 Subject: [PATCH 0353/3804] ASoC: audio-graph: tidyup graph_dai_link_of_dpcm() Use local variable at local area only. Signed-off-by: Kuninori Morimoto Tested-by: Michael Walle Link: https://lore.kernel.org/r/87eeeeax16.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/audio-graph-card.c | 30 +++++++++++++--------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index 2c8a2fcb7922a..0159a4576e9c1 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -276,24 +276,19 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, struct link_info *li) { struct device *dev = simple_priv_to_dev(priv); - struct snd_soc_card *card = simple_priv_to_card(priv); struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link); struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); struct device_node *top = dev->of_node; struct device_node *ep = li->cpu ? cpu_ep : codec_ep; - struct device_node *port; - struct device_node *ports; - struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); - struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); char dai_name[64]; int ret; - port = of_get_parent(ep); - ports = of_get_parent(port); - dev_dbg(dev, "link_of DPCM (%pOF)\n", ep); if (li->cpu) { + struct snd_soc_card *card = simple_priv_to_card(priv); + struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); + /* Codec is dummy */ /* FE settings */ @@ -302,7 +297,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, ret = graph_parse_node(priv, cpu_ep, li, 1); if (ret) - goto out_put_node; + return ret; snprintf(dai_name, sizeof(dai_name), "fe.%pOFP.%s", cpus->of_node, cpus->dai_name); @@ -319,7 +314,10 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, if (card->component_chaining && !soc_component_is_pcm(cpus)) dai_link->no_pcm = 1; } else { - struct snd_soc_codec_conf *cconf; + struct snd_soc_codec_conf *cconf = simple_props_to_codec_conf(dai_props, 0); + struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); + struct device_node *port; + struct device_node *ports; /* CPU is dummy */ @@ -327,22 +325,25 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->no_pcm = 1; dai_link->be_hw_params_fixup = asoc_simple_be_hw_params_fixup; - cconf = simple_props_to_codec_conf(dai_props, 0); - ret = graph_parse_node(priv, codec_ep, li, 0); if (ret < 0) - goto out_put_node; + return ret; snprintf(dai_name, sizeof(dai_name), "be.%pOFP.%s", codecs->of_node, codecs->dai_name); /* check "prefix" from top node */ + port = of_get_parent(ep); + ports = of_get_parent(port); snd_soc_of_parse_node_prefix(top, cconf, codecs->of_node, "prefix"); if (of_node_name_eq(ports, "ports")) snd_soc_of_parse_node_prefix(ports, cconf, codecs->of_node, "prefix"); snd_soc_of_parse_node_prefix(port, cconf, codecs->of_node, "prefix"); + + of_node_put(ports); + of_node_put(port); } graph_parse_convert(dev, ep, &dai_props->adata); @@ -351,11 +352,8 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, ret = graph_link_init(priv, cpu_ep, codec_ep, li, dai_name); -out_put_node: li->link++; - of_node_put(ports); - of_node_put(port); return ret; } -- GitLab From 582f3503f96543f3afbaaaa085755fd167a0f71e Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Tue, 11 May 2021 10:18:48 +0900 Subject: [PATCH 0354/3804] ASoC: audio-graph: tidyup graph_parse_node() audio-graph is using cpus->dai_name / codecs->dai_name for dailink->name. In graph_parse_node(), xxx->dai_name is got by snd_soc_get_dai_name(), but it might be removed soon by asoc_simple_canonicalize_cpu(). The order should be *1) call snd_soc_get_dai_name() 2) create dailink name *3) call asoc_simple_canonicalize_cpu() * are implemented in graph_parse_node(). This patch remove 3) from graph_parse_node() Reported-by: "kernelci.org bot" Fixes: 8859f809c7d5813 ("ASoC: audio-graph: add graph_parse_node()") Fixes: e51237b8d305225 ("ASoC: audio-graph: add graph_link_init()") Signed-off-by: Kuninori Morimoto Tested-by: Michael Walle Link: https://lore.kernel.org/r/87cztyawzr.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- sound/soc/generic/audio-graph-card.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index 0159a4576e9c1..5e71382467e88 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -209,7 +209,7 @@ static void graph_parse_mclk_fs(struct device_node *top, static int graph_parse_node(struct asoc_simple_priv *priv, struct device_node *ep, struct link_info *li, - int is_cpu) + int *cpu) { struct device *dev = simple_priv_to_dev(priv); struct device_node *top = dev->of_node; @@ -217,9 +217,9 @@ static int graph_parse_node(struct asoc_simple_priv *priv, struct simple_dai_props *dai_props = simple_priv_to_props(priv, li->link); struct snd_soc_dai_link_component *dlc; struct asoc_simple_dai *dai; - int ret, single = 0; + int ret; - if (is_cpu) { + if (cpu) { dlc = asoc_link_to_cpu(dai_link, 0); dai = simple_props_to_dai_cpu(dai_props, 0); } else { @@ -229,7 +229,7 @@ static int graph_parse_node(struct asoc_simple_priv *priv, graph_parse_mclk_fs(top, ep, dai_props); - ret = asoc_simple_parse_dai(ep, dlc, &single); + ret = asoc_simple_parse_dai(ep, dlc, cpu); if (ret < 0) return ret; @@ -241,9 +241,6 @@ static int graph_parse_node(struct asoc_simple_priv *priv, if (ret < 0) return ret; - if (is_cpu) - asoc_simple_canonicalize_cpu(dlc, single); - return 0; } @@ -288,6 +285,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, if (li->cpu) { struct snd_soc_card *card = simple_priv_to_card(priv); struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); + int is_single_links = 0; /* Codec is dummy */ @@ -295,7 +293,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->dynamic = 1; dai_link->dpcm_merged_format = 1; - ret = graph_parse_node(priv, cpu_ep, li, 1); + ret = graph_parse_node(priv, cpu_ep, li, &is_single_links); if (ret) return ret; @@ -313,6 +311,8 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, */ if (card->component_chaining && !soc_component_is_pcm(cpus)) dai_link->no_pcm = 1; + + asoc_simple_canonicalize_cpu(cpus, is_single_links); } else { struct snd_soc_codec_conf *cconf = simple_props_to_codec_conf(dai_props, 0); struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); @@ -325,7 +325,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv, dai_link->no_pcm = 1; dai_link->be_hw_params_fixup = asoc_simple_be_hw_params_fixup; - ret = graph_parse_node(priv, codec_ep, li, 0); + ret = graph_parse_node(priv, codec_ep, li, NULL); if (ret < 0) return ret; @@ -367,20 +367,23 @@ static int graph_dai_link_of(struct asoc_simple_priv *priv, struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0); struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0); char dai_name[64]; - int ret; + int ret, is_single_links = 0; dev_dbg(dev, "link_of (%pOF)\n", cpu_ep); - ret = graph_parse_node(priv, cpu_ep, li, 1); + ret = graph_parse_node(priv, cpu_ep, li, &is_single_links); if (ret < 0) return ret; - ret = graph_parse_node(priv, codec_ep, li, 0); + ret = graph_parse_node(priv, codec_ep, li, NULL); if (ret < 0) return ret; snprintf(dai_name, sizeof(dai_name), "%s-%s", cpus->dai_name, codecs->dai_name); + + asoc_simple_canonicalize_cpu(cpus, is_single_links); + ret = graph_link_init(priv, cpu_ep, codec_ep, li, dai_name); if (ret < 0) return ret; -- GitLab From 0fad605fb0bdc00d8ad78696300ff2fbdee6e048 Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 11 May 2021 14:28:55 +0100 Subject: [PATCH 0355/3804] ASoC: cs42l42: Regmap must use_single_read/write cs42l42 does not support standard burst transfers so the use_single_read and use_single_write flags must be set in the regmap config. Because of this bug, the patch: commit 0a0eb567e1d4 ("ASoC: cs42l42: Minor error paths fixups") broke cs42l42 probe() because without the use_single_* flags it causes regmap to issue a burst read. However, the missing use_single_* could cause problems anyway because the regmap cache can attempt burst transfers if these flags are not set. Fixes: 2c394ca79604 ("ASoC: Add support for CS42L42 codec") Signed-off-by: Richard Fitzgerald Acked-by: Charles Keepax Link: https://lore.kernel.org/r/20210511132855.27159-1-rf@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l42.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index bf982e145e945..77473c226f9ec 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -399,6 +399,9 @@ static const struct regmap_config cs42l42_regmap = { .reg_defaults = cs42l42_reg_defaults, .num_reg_defaults = ARRAY_SIZE(cs42l42_reg_defaults), .cache_type = REGCACHE_RBTREE, + + .use_single_read = true, + .use_single_write = true, }; static DECLARE_TLV_DB_SCALE(adc_tlv, -9600, 100, false); -- GitLab From 29dd19e3ac7b2a8671ebeac02859232ce0e34f58 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 11 May 2021 17:03:21 +0200 Subject: [PATCH 0356/3804] media: exynos4-is: remove a now unused integer The usage of pm_runtime_resume_and_get() removed the need of a temporary integer. So, drop it. Fixes: 59f96244af94 ("media: exynos4-is: fix pm_runtime_get_sync() usage count") Reported-by: Stephen Rothwell Signed-off-by: Mauro Carvalho Chehab --- drivers/media/platform/exynos4-is/media-dev.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/platform/exynos4-is/media-dev.c b/drivers/media/platform/exynos4-is/media-dev.c index e025178db06c3..3b8a24bb724c8 100644 --- a/drivers/media/platform/exynos4-is/media-dev.c +++ b/drivers/media/platform/exynos4-is/media-dev.c @@ -1284,7 +1284,6 @@ static DEVICE_ATTR(subdev_conf_mode, S_IWUSR | S_IRUGO, static int cam_clk_prepare(struct clk_hw *hw) { struct cam_clk *camclk = to_cam_clk(hw); - int ret; if (camclk->fmd->pmf == NULL) return -ENODEV; -- GitLab From 2ee4c8a268764e751ee44dfffa76c813cfc27aee Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 11 May 2021 16:00:03 +0100 Subject: [PATCH 0357/3804] MAINTAINERS: Add Krzysztof as PCI host/endpoint controllers reviewer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Krzysztof has been carrying out PCI patches review for a long time and he has been instrumental in driving PCI host/endpoint controller drivers improvements. Make his role official. Link: https://lore.kernel.org/r/20210511150003.1592-1-lorenzo.pieralisi@arm.com Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Kishon Vijay Abraham I Cc: Krzysztof Wilczyński --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f2..9755bf97658d6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14110,6 +14110,7 @@ F: drivers/pci/controller/pci-v3-semi.c PCI ENDPOINT SUBSYSTEM M: Kishon Vijay Abraham I M: Lorenzo Pieralisi +R: Krzysztof Wilczyński L: linux-pci@vger.kernel.org S: Supported F: Documentation/PCI/endpoint/* @@ -14158,6 +14159,7 @@ F: drivers/pci/controller/pci-xgene-msi.c PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS M: Lorenzo Pieralisi R: Rob Herring +R: Krzysztof Wilczyński L: linux-pci@vger.kernel.org S: Supported Q: http://patchwork.ozlabs.org/project/linux-pci/list/ -- GitLab From 5e1f689913a4498e3081093670ef9d85b2c60920 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 29 Apr 2021 14:18:53 +0200 Subject: [PATCH 0358/3804] nvme-multipath: fix double initialization of ANA state nvme_init_identify and thus nvme_mpath_init can be called multiple times and thus must not overwrite potentially initialized or in-use fields. Split out a helper for the basic initialization when the controller is initialized and make sure the init_identify path does not blindly change in-use data structures. Fixes: 0d0b660f214d ("nvme: add ANA support") Reported-by: Martin Wilck Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke --- drivers/nvme/host/core.c | 3 +- drivers/nvme/host/multipath.c | 55 ++++++++++++++++++----------------- drivers/nvme/host/nvme.h | 8 +++-- 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 522c9b229f80e..762125f2905f7 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2901,7 +2901,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->hmmaxd = le16_to_cpu(id->hmmaxd); } - ret = nvme_mpath_init(ctrl, id); + ret = nvme_mpath_init_identify(ctrl, id); if (ret < 0) goto out_free; @@ -4364,6 +4364,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, min(default_ps_max_latency_us, (unsigned long)S32_MAX)); nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device)); + nvme_mpath_init_ctrl(ctrl); return 0; out_free_name: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 0551796517e61..deb14562c96ae 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -781,9 +781,18 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) put_disk(head->disk); } -int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl) { - int error; + mutex_init(&ctrl->ana_lock); + timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0); + INIT_WORK(&ctrl->ana_work, nvme_ana_work); +} + +int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) +{ + size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT; + size_t ana_log_size; + int error = 0; /* check if multipath is enabled and we have the capability */ if (!multipath || !ctrl->subsys || @@ -795,37 +804,31 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) ctrl->nanagrpid = le32_to_cpu(id->nanagrpid); ctrl->anagrpmax = le32_to_cpu(id->anagrpmax); - mutex_init(&ctrl->ana_lock); - timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0); - ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) + - ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc); - ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32); - - if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) { + ana_log_size = sizeof(struct nvme_ana_rsp_hdr) + + ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) + + ctrl->max_namespaces * sizeof(__le32); + if (ana_log_size > max_transfer_size) { dev_err(ctrl->device, - "ANA log page size (%zd) larger than MDTS (%d).\n", - ctrl->ana_log_size, - ctrl->max_hw_sectors << SECTOR_SHIFT); + "ANA log page size (%zd) larger than MDTS (%zd).\n", + ana_log_size, max_transfer_size); dev_err(ctrl->device, "disabling ANA support.\n"); - return 0; + goto out_uninit; } - - INIT_WORK(&ctrl->ana_work, nvme_ana_work); - kfree(ctrl->ana_log_buf); - ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL); - if (!ctrl->ana_log_buf) { - error = -ENOMEM; - goto out; + if (ana_log_size > ctrl->ana_log_size) { + nvme_mpath_stop(ctrl); + kfree(ctrl->ana_log_buf); + ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL); + if (!ctrl->ana_log_buf) + return -ENOMEM; } - + ctrl->ana_log_size = ana_log_size; error = nvme_read_ana_log(ctrl); if (error) - goto out_free_ana_log_buf; + goto out_uninit; return 0; -out_free_ana_log_buf: - kfree(ctrl->ana_log_buf); - ctrl->ana_log_buf = NULL; -out: + +out_uninit: + nvme_mpath_uninit(ctrl); return error; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 05f31a2c64bb2..0015860ec12bf 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -712,7 +712,8 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); void nvme_mpath_remove_disk(struct nvme_ns_head *head); -int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); +int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); +void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); @@ -780,7 +781,10 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) static inline void nvme_trace_bio_complete(struct request *req) { } -static inline int nvme_mpath_init(struct nvme_ctrl *ctrl, +static inline void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl) +{ +} +static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) -- GitLab From 608a969046e6e0567d05a166be66c77d2dd8220b Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 6 May 2021 18:51:35 -0700 Subject: [PATCH 0359/3804] nvmet: fix inline bio check for bdev-ns When handling rw commands, for inline bio case we only consider transfer size. This works well when req->sg_cnt fits into the req->inline_bvec, but it will result in the warning in __bio_add_page() when req->sg_cnt > NVMET_MAX_INLINE_BVEC. Consider an I/O size 32768 and first page is not aligned to the page boundary, then I/O is split in following manner :- [ 2206.256140] nvmet: sg->length 3440 sg->offset 656 [ 2206.256144] nvmet: sg->length 4096 sg->offset 0 [ 2206.256148] nvmet: sg->length 4096 sg->offset 0 [ 2206.256152] nvmet: sg->length 4096 sg->offset 0 [ 2206.256155] nvmet: sg->length 4096 sg->offset 0 [ 2206.256159] nvmet: sg->length 4096 sg->offset 0 [ 2206.256163] nvmet: sg->length 4096 sg->offset 0 [ 2206.256166] nvmet: sg->length 4096 sg->offset 0 [ 2206.256170] nvmet: sg->length 656 sg->offset 0 Now the req->transfer_size == NVMET_MAX_INLINE_DATA_LEN i.e. 32768, but the req->sg_cnt is (9) > NVMET_MAX_INLINE_BIOVEC which is (8). This will result in the following warning message :- nvmet_bdev_execute_rw() bio_add_page() __bio_add_page() WARN_ON_ONCE(bio_full(bio, len)); This scenario is very hard to reproduce on the nvme-loop transport only with rw commands issued with the passthru IOCTL interface from the host application and the data buffer is allocated with the malloc() and not the posix_memalign(). Fixes: 73383adfad24 ("nvmet: don't split large I/Os unconditionally") Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-bdev.c | 2 +- drivers/nvme/target/nvmet.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 9a8b3726a37c4..429263ca9b978 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -258,7 +258,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba); - if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { + if (nvmet_use_inline_bvec(req)) { bio = &req->b.inline_bio; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); } else { diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 5566ed403576e..d69a409515d65 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -616,4 +616,10 @@ static inline sector_t nvmet_lba_to_sect(struct nvmet_ns *ns, __le64 lba) return le64_to_cpu(lba) << (ns->blksize_shift - SECTOR_SHIFT); } +static inline bool nvmet_use_inline_bvec(struct nvmet_req *req) +{ + return req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN && + req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC; +} + #endif /* _NVMET_H */ -- GitLab From ab96de5def854d8fc51280b6a20597e64b14ac31 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 6 May 2021 18:51:36 -0700 Subject: [PATCH 0360/3804] nvmet: fix inline bio check for passthru When handling passthru commands, for inline bio allocation we only consider the transfer size. This works well when req->sg_cnt fits into the req->inline_bvec, but it will result in the early return from bio_add_hw_page() when req->sg_cnt > NVMET_MAX_INLINE_BVEC. Consider an I/O of size 32768 and first buffer is not aligned to the page boundary, then I/O is split in following manner :- [ 2206.256140] nvmet: sg->length 3440 sg->offset 656 [ 2206.256144] nvmet: sg->length 4096 sg->offset 0 [ 2206.256148] nvmet: sg->length 4096 sg->offset 0 [ 2206.256152] nvmet: sg->length 4096 sg->offset 0 [ 2206.256155] nvmet: sg->length 4096 sg->offset 0 [ 2206.256159] nvmet: sg->length 4096 sg->offset 0 [ 2206.256163] nvmet: sg->length 4096 sg->offset 0 [ 2206.256166] nvmet: sg->length 4096 sg->offset 0 [ 2206.256170] nvmet: sg->length 656 sg->offset 0 Now the req->transfer_size == NVMET_MAX_INLINE_DATA_LEN i.e. 32768, but the req->sg_cnt is (9) > NVMET_MAX_INLINE_BIOVEC which is (8). This will result in early return in the following code path :- nvmet_bdev_execute_rw() bio_add_pc_page() bio_add_hw_page() if (bio_full(bio, len)) return 0; Use previously introduced helper nvmet_use_inline_bvec() to consider req->sg_cnt when using inline bio. This only affects nvme-loop transport. Fixes: dab3902b19a0 ("nvmet: use inline bio for passthru fast path") Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/passthru.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 2798944899b73..39b1473f7204e 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -194,7 +194,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) if (req->sg_cnt > BIO_MAX_VECS) return -EINVAL; - if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { + if (nvmet_use_inline_bvec(req)) { bio = &req->p.inline_bio; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); } else { -- GitLab From 8cc365f9559b86802afc0208389f5c8d46b4ad61 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Thu, 6 May 2021 10:08:19 +0300 Subject: [PATCH 0361/3804] nvmet-rdma: Fix NULL deref when SEND is completed with error When running some traffic and taking down the link on peer, a retry counter exceeded error is received. This leads to nvmet_rdma_error_comp which tried accessing the cq_context to obtain the queue. The cq_context is no longer valid after the fix to use shared CQ mechanism and should be obtained similar to how it is obtained in other functions from the wc->qp. [ 905.786331] nvmet_rdma: SEND for CQE 0x00000000e3337f90 failed with status transport retry counter exceeded (12). [ 905.832048] BUG: unable to handle kernel NULL pointer dereference at 0000000000000048 [ 905.839919] PGD 0 P4D 0 [ 905.842464] Oops: 0000 1 SMP NOPTI [ 905.846144] CPU: 13 PID: 1557 Comm: kworker/13:1H Kdump: loaded Tainted: G OE --------- - - 4.18.0-304.el8.x86_64 #1 [ 905.872135] RIP: 0010:nvmet_rdma_error_comp+0x5/0x1b [nvmet_rdma] [ 905.878259] Code: 19 4f c0 e8 89 b3 a5 f6 e9 5b e0 ff ff 0f b7 75 14 4c 89 ea 48 c7 c7 08 1a 4f c0 e8 71 b3 a5 f6 e9 4b e0 ff ff 0f 1f 44 00 00 <48> 8b 47 48 48 85 c0 74 08 48 89 c7 e9 98 bf 49 00 e9 c3 e3 ff ff [ 905.897135] RSP: 0018:ffffab601c45fe28 EFLAGS: 00010246 [ 905.902387] RAX: 0000000000000065 RBX: ffff9e729ea2f800 RCX: 0000000000000000 [ 905.909558] RDX: 0000000000000000 RSI: ffff9e72df9567c8 RDI: 0000000000000000 [ 905.916731] RBP: ffff9e729ea2b400 R08: 000000000000074d R09: 0000000000000074 [ 905.923903] R10: 0000000000000000 R11: ffffab601c45fcc0 R12: 0000000000000010 [ 905.931074] R13: 0000000000000000 R14: 0000000000000010 R15: ffff9e729ea2f400 [ 905.938247] FS: 0000000000000000(0000) GS:ffff9e72df940000(0000) knlGS:0000000000000000 [ 905.938249] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 905.950067] nvmet_rdma: SEND for CQE 0x00000000c7356cca failed with status transport retry counter exceeded (12). [ 905.961855] CR2: 0000000000000048 CR3: 000000678d010004 CR4: 00000000007706e0 [ 905.961855] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 905.961856] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 905.961857] PKRU: 55555554 [ 906.010315] Call Trace: [ 906.012778] __ib_process_cq+0x89/0x170 [ib_core] [ 906.017509] ib_cq_poll_work+0x26/0x80 [ib_core] [ 906.022152] process_one_work+0x1a7/0x360 [ 906.026182] ? create_worker+0x1a0/0x1a0 [ 906.030123] worker_thread+0x30/0x390 [ 906.033802] ? create_worker+0x1a0/0x1a0 [ 906.037744] kthread+0x116/0x130 [ 906.040988] ? kthread_flush_work_fn+0x10/0x10 [ 906.045456] ret_from_fork+0x1f/0x40 Fixes: ca0f1a8055be2 ("nvmet-rdma: use new shared CQ mechanism") Signed-off-by: Shai Malin Signed-off-by: Michal Kalderon Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 6c1f3ab7649c7..7d607f435e366 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -700,7 +700,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe); - struct nvmet_rdma_queue *queue = cq->cq_context; + struct nvmet_rdma_queue *queue = wc->qp->qp_context; nvmet_rdma_release_rsp(rsp); @@ -786,7 +786,7 @@ static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) { struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe); - struct nvmet_rdma_queue *queue = cq->cq_context; + struct nvmet_rdma_queue *queue = wc->qp->qp_context; struct rdma_cm_id *cm_id = rsp->queue->cm_id; u16 status; -- GitLab From 3651aaacd10b2f8cee3780c490fc2df55bd4f543 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 10 May 2021 12:15:36 -0700 Subject: [PATCH 0362/3804] nvmet: demote discovery cmd parse err msg to debug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Host can send invalid commands and flood the target with error messages for the discovery controller. Demote the error message from pr_err() to pr_debug( in nvmet_parse_discovery_cmd().  Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/discovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 4845d12e374ac..fc3645fc2c249 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -379,7 +379,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) req->execute = nvmet_execute_disc_identify; return 0; default: - pr_err("unhandled cmd %d\n", cmd->common.opcode); + pr_debug("unhandled cmd %d\n", cmd->common.opcode); req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } -- GitLab From 4c2dab2bf5ace0ddc07ca7f04a7ba32fc3b23492 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 10 May 2021 12:15:37 -0700 Subject: [PATCH 0363/3804] nvmet: use helper to remove the duplicate code Use the helper nvmet_report_invalid_opcode() to report invalid opcode so we can remove the duplicate code. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index e7a367cf6d367..dcd49a72f2f3c 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -975,10 +975,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) case nvme_admin_keep_alive: req->execute = nvmet_execute_keep_alive; return 0; + default: + return nvmet_report_invalid_opcode(req); } - - pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode, - req->sq->qid); - req->error_loc = offsetof(struct nvme_common_command, opcode); - return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } -- GitLab From 7a4ffd20ec6d31dfde2cc5608851e5109ffed7c9 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 10 May 2021 12:15:38 -0700 Subject: [PATCH 0364/3804] nvmet: demote fabrics cmd parse err msg to debug Host can send invalid commands and flood the target with error messages. Demote the error message from pr_err() to pr_debug() in nvmet_parse_fabrics_cmd() and nvmet_parse_connect_cmd(). Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fabrics-cmd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 1420a8e3e0b10..7d0f3523fdab2 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -94,7 +94,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) req->execute = nvmet_execute_prop_get; break; default: - pr_err("received unknown capsule type 0x%x\n", + pr_debug("received unknown capsule type 0x%x\n", cmd->fabrics.fctype); req->error_loc = offsetof(struct nvmf_common_command, fctype); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; @@ -284,13 +284,13 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req) struct nvme_command *cmd = req->cmd; if (!nvme_is_fabrics(cmd)) { - pr_err("invalid command 0x%x on unconnected queue.\n", + pr_debug("invalid command 0x%x on unconnected queue.\n", cmd->fabrics.opcode); req->error_loc = offsetof(struct nvme_common_command, opcode); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } if (cmd->fabrics.fctype != nvme_fabrics_type_connect) { - pr_err("invalid capsule type 0x%x on unconnected queue.\n", + pr_debug("invalid capsule type 0x%x on unconnected queue.\n", cmd->fabrics.fctype); req->error_loc = offsetof(struct nvmf_common_command, fctype); return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; -- GitLab From 918d9c77791cc8267b5b5ab556c868dfa57e0d93 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 11 May 2021 17:01:28 +0200 Subject: [PATCH 0365/3804] docs: cdrom-standard.rst: get rid of uneeded UTF-8 chars This file was converted from a LaTeX one. The conversion used some UTF-8 characters at the literal blocks. Replace them by normal ASCII characters. Signed-off-by: Mauro Carvalho Chehab Acked-by: Jens Axboe Link: https://lore.kernel.org/r/79c3f482da17ea48d69b6e6ad1b7fb102b9dd7bf.1620744606.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/cdrom/cdrom-standard.rst | 30 +++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst index 70500b189cc84..5845960ca3821 100644 --- a/Documentation/cdrom/cdrom-standard.rst +++ b/Documentation/cdrom/cdrom-standard.rst @@ -146,18 +146,18 @@ with the kernel as a block device by registering the following general *struct file_operations*:: struct file_operations cdrom_fops = { - NULL, /∗ lseek ∗/ - block _read , /∗ read—general block-dev read ∗/ - block _write, /∗ write—general block-dev write ∗/ - NULL, /∗ readdir ∗/ - NULL, /∗ select ∗/ - cdrom_ioctl, /∗ ioctl ∗/ - NULL, /∗ mmap ∗/ - cdrom_open, /∗ open ∗/ - cdrom_release, /∗ release ∗/ - NULL, /∗ fsync ∗/ - NULL, /∗ fasync ∗/ - NULL /∗ revalidate ∗/ + NULL, /* lseek */ + block _read , /* read--general block-dev read */ + block _write, /* write--general block-dev write */ + NULL, /* readdir */ + NULL, /* select */ + cdrom_ioctl, /* ioctl */ + NULL, /* mmap */ + cdrom_open, /* open */ + cdrom_release, /* release */ + NULL, /* fsync */ + NULL, /* fasync */ + NULL /* revalidate */ }; Every active CD-ROM device shares this *struct*. The routines @@ -250,12 +250,12 @@ The drive-specific, minor-like information that is registered with `cdrom.c`, currently contains the following fields:: struct cdrom_device_info { - const struct cdrom_device_ops * ops; /* device operations for this major */ + const struct cdrom_device_ops * ops; /* device operations for this major */ struct list_head list; /* linked list of all device_info */ struct gendisk * disk; /* matching block layer disk */ void * handle; /* driver-dependent data */ - int mask; /* mask of capability: disables them */ + int mask; /* mask of capability: disables them */ int speed; /* maximum speed for reading data */ int capacity; /* number of discs in a jukebox */ @@ -569,7 +569,7 @@ the *CDC_CLOSE_TRAY* bit in *mask*. In the file `cdrom.c` you will encounter many constructions of the type:: - if (cdo->capability & ∼cdi->mask & CDC _⟨capability⟩) ... + if (cdo->capability & ~cdi->mask & CDC _) ... There is no *ioctl* to set the mask... The reason is that I think it is better to control the **behavior** rather than the -- GitLab From 8d3926c09e043448d4d26896b8225943f12d0933 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 11 May 2021 17:01:29 +0200 Subject: [PATCH 0366/3804] docs: ABI: remove a meaningless UTF-8 character MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Those two files have this character: - U+00ac ('¬'): NOT SIGN at the end of the first line, apparently for no reason. Drop them. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/6cd3f0b47568fecb7889fd18d1d744c3aaf73866.1620744606.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/ABI/obsolete/sysfs-kernel-fadump_registered | 2 +- Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered index 0360be39c98e9..dae880b1a5d5d 100644 --- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered @@ -1,4 +1,4 @@ -This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.¬ +This ABI is renamed and moved to a new location /sys/kernel/fadump/registered. What: /sys/kernel/fadump_registered Date: Feb 2012 diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem index 6ce0b129ab12d..ca2396edb5f10 100644 --- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem +++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem @@ -1,4 +1,4 @@ -This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.¬ +This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem. What: /sys/kernel/fadump_release_mem Date: Feb 2012 -- GitLab From 6f3bceba03b4f18e0b83261e2fb761e0ad5da625 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 11 May 2021 17:01:30 +0200 Subject: [PATCH 0367/3804] docs: ABI: remove some spurious characters The KernelVersion tag contains some spurious UTF-8 characters for no reason. Drop them. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/6d774ad6cb3795a177309503a39f8f1b5e309d64.1620744606.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/ABI/testing/sysfs-module | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module index a485434d2a0fb..88bddf192ceb7 100644 --- a/Documentation/ABI/testing/sysfs-module +++ b/Documentation/ABI/testing/sysfs-module @@ -37,13 +37,13 @@ Description: Maximum time allowed for periodic transfers per microframe (μs) What: /sys/module/*/{coresize,initsize} Date: Jan 2012 -KernelVersion:»·3.3 +KernelVersion: 3.3 Contact: Kay Sievers Description: Module size in bytes. What: /sys/module/*/taint Date: Jan 2012 -KernelVersion:»·3.3 +KernelVersion: 3.3 Contact: Kay Sievers Description: Module taint flags: == ===================== -- GitLab From d1f2722d5357d7a5138b1be8bd64946f0a14c81e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 11 May 2021 17:01:31 +0200 Subject: [PATCH 0368/3804] docs: hwmon: tmp103.rst: fix bad usage of UTF-8 chars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While UTF-8 characters can be used at the Linux documentation, the best is to use them only when ASCII doesn't offer a good replacement. So, replace the occurences of the following UTF-8 characters: - U+2013 ('–'): EN DASH In this specific case, EN DASH was used instead of a minus sign. So, replace it by a single hyphen. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/73b3c7c1eef5c12ddc941624d23689313bd56529.1620744606.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- Documentation/hwmon/tmp103.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/hwmon/tmp103.rst b/Documentation/hwmon/tmp103.rst index e195a7d14309a..b3ef81475cf8b 100644 --- a/Documentation/hwmon/tmp103.rst +++ b/Documentation/hwmon/tmp103.rst @@ -21,10 +21,10 @@ Description The TMP103 is a digital output temperature sensor in a four-ball wafer chip-scale package (WCSP). The TMP103 is capable of reading temperatures to a resolution of 1°C. The TMP103 is specified for -operation over a temperature range of –40°C to +125°C. +operation over a temperature range of -40°C to +125°C. Resolution: 8 Bits -Accuracy: ±1°C Typ (–10°C to +100°C) +Accuracy: ±1°C Typ (-10°C to +100°C) The driver provides the common sysfs-interface for temperatures (see Documentation/hwmon/sysfs-interface.rst under Temperatures). -- GitLab From 5e716ec68b4a75a84e28c0efa68db613deb64981 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Tue, 11 May 2021 17:01:32 +0200 Subject: [PATCH 0369/3804] docs: networking: device_drivers: fix bad usage of UTF-8 chars MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Probably because the original file was pre-processed by some tool, both i40e.rst and iavf.rst files are using this character: - U+2013 ('–'): EN DASH meaning an hyphen when calling a command line application, which is obviously wrong. So, replace them by an hyphen, ensuring that it will be properly displayed as literals when building the documentation. Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/95eb2a48d0ca3528780ce0dfce64359977fa8cb3.1620744606.git.mchehab+huawei@kernel.org Signed-off-by: Jonathan Corbet --- .../networking/device_drivers/ethernet/intel/i40e.rst | 4 ++-- .../networking/device_drivers/ethernet/intel/iavf.rst | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst index 8a9b18573688d..2d3f6bd969a2b 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst @@ -173,7 +173,7 @@ Director rule is added from ethtool (Sideband filter), ATR is turned off by the driver. To re-enable ATR, the sideband can be disabled with the ethtool -K option. For example:: - ethtool –K [adapter] ntuple [off|on] + ethtool -K [adapter] ntuple [off|on] If sideband is re-enabled after ATR is re-enabled, ATR remains enabled until a TCP-IP flow is added. When all TCP-IP sideband rules are deleted, ATR is @@ -688,7 +688,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates. Totals must be equal or less than port speed. For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network -monitoring tools such as ifstat or sar –n DEV [interval] [number of samples] +monitoring tools such as `ifstat` or `sar -n DEV [interval] [number of samples]` 2. Enable HW TC offload on interface:: diff --git a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst index 52e037b11c979..25330b7b5168d 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst @@ -179,7 +179,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates. Totals must be equal or less than port speed. For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network -monitoring tools such as ifstat or sar –n DEV [interval] [number of samples] +monitoring tools such as ``ifstat`` or ``sar -n DEV [interval] [number of samples]`` NOTE: Setting up channels via ethtool (ethtool -L) is not supported when the -- GitLab From 7240cd200541543008a7ce4fcaf2ba5a5556128f Mon Sep 17 00:00:00 2001 From: Desmond Cheong Zhi Xi Date: Tue, 11 May 2021 09:49:37 -0400 Subject: [PATCH 0370/3804] Remove link to nonexistent rocket driver docs The rocket driver and documentation were removed in this commit, but the corresponding entry in index.rst was not removed. Signed-off-by: Desmond Cheong Zhi Xi Fixes: 3b00b6af7a5b ("tty: rocket, remove the driver") Link: https://lore.kernel.org/r/20210511134937.2442291-1-desmondcheongzx@gmail.com Signed-off-by: Jonathan Corbet --- Documentation/driver-api/serial/index.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/driver-api/serial/index.rst b/Documentation/driver-api/serial/index.rst index 21351b8c95a4d..8f7d7af3b90b1 100644 --- a/Documentation/driver-api/serial/index.rst +++ b/Documentation/driver-api/serial/index.rst @@ -19,7 +19,6 @@ Serial drivers moxa-smartio n_gsm - rocket serial-iso7816 serial-rs485 -- GitLab From 965a7d72e798eb7af0aa67210e37cf7ecd1c9cad Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Tue, 11 May 2021 20:02:42 +0200 Subject: [PATCH 0371/3804] mac80211: assure all fragments are encrypted Do not mix plaintext and encrypted fragments in protected Wi-Fi networks. This fixes CVE-2020-26147. Previously, an attacker was able to first forward a legitimate encrypted fragment towards a victim, followed by a plaintext fragment. The encrypted and plaintext fragment would then be reassembled. For further details see Section 6.3 and Appendix D in the paper "Fragment and Forge: Breaking Wi-Fi Through Frame Aggregation and Fragmentation". Because of this change there are now two equivalent conditions in the code to determine if a received fragment requires sequential PNs, so we also move this test to a separate function to make the code easier to maintain. Cc: stable@vger.kernel.org Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20210511200110.30c4394bb835.I5acfdb552cc1d20c339c262315950b3eac491397@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 62047e93e217b..65fc674e27cc0 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2194,6 +2194,16 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, return NULL; } +static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc) +{ + return rx->key && + (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP || + rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) && + ieee80211_has_protected(fc); +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) { @@ -2238,12 +2248,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is the first fragment of a new frame. */ entry = ieee80211_reassemble_add(rx->sdata, frag, seq, rx->seqno_idx, &(rx->skb)); - if (rx->key && - (rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP || - rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP_256 || - rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP || - rx->key->conf.cipher == WLAN_CIPHER_SUITE_GCMP_256) && - ieee80211_has_protected(fc)) { + if (requires_sequential_pn(rx, fc)) { int queue = rx->security_idx; /* Store CCMP/GCMP PN so that we can verify that the @@ -2285,11 +2290,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; int queue; - if (!rx->key || - (rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP_256 && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_GCMP_256)) + if (!requires_sequential_pn(rx, fc)) return RX_DROP_UNUSABLE; memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { -- GitLab From 94034c40ab4a3fcf581fbc7f8fdf4e29943c4a24 Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Tue, 11 May 2021 20:02:43 +0200 Subject: [PATCH 0372/3804] mac80211: prevent mixed key and fragment cache attacks Simultaneously prevent mixed key attacks (CVE-2020-24587) and fragment cache attacks (CVE-2020-24586). This is accomplished by assigning a unique color to every key (per interface) and using this to track which key was used to decrypt a fragment. When reassembling frames, it is now checked whether all fragments were decrypted using the same key. To assure that fragment cache attacks are also prevented, the ID that is assigned to keys is unique even over (re)associations and (re)connects. This means fragments separated by a (re)association or (re)connect will not be reassembled. Because mac80211 now also prevents the reassembly of mixed encrypted and plaintext fragments, all cache attacks are prevented. Cc: stable@vger.kernel.org Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20210511200110.3f8290e59823.I622a67769ed39257327a362cfc09c812320eb979@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/key.c | 7 +++++++ net/mac80211/key.h | 2 ++ net/mac80211/rx.c | 6 ++++++ 4 files changed, 16 insertions(+) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8fcbaa1eedf3e..874ffe7819e53 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -97,6 +97,7 @@ struct ieee80211_fragment_entry { u8 rx_queue; bool check_sequential_pn; /* needed for CCMP/GCMP */ u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ + unsigned int key_color; }; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 56c068cb49c4d..f695fc80088bc 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -799,6 +799,7 @@ int ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { + static atomic_t key_color = ATOMIC_INIT(0); struct ieee80211_key *old_key; int idx = key->conf.keyidx; bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; @@ -850,6 +851,12 @@ int ieee80211_key_link(struct ieee80211_key *key, key->sdata = sdata; key->sta = sta; + /* + * Assign a unique ID to every key so we can easily prevent mixed + * key and fragment cache attacks. + */ + key->color = atomic_inc_return(&key_color); + increment_tailroom_need_count(sdata); ret = ieee80211_key_replace(sdata, sta, pairwise, old_key, key); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 7ad72e9b4991d..1e326c89d7217 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -128,6 +128,8 @@ struct ieee80211_key { } debugfs; #endif + unsigned int color; + /* * key config, must be last because it contains key * material as variable length member diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 65fc674e27cc0..531232b91bc49 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2255,6 +2255,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) * next fragment has a sequential PN value. */ entry->check_sequential_pn = true; + entry->key_color = rx->key->color; memcpy(entry->last_pn, rx->key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN); @@ -2292,6 +2293,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (!requires_sequential_pn(rx, fc)) return RX_DROP_UNUSABLE; + + /* Prevent mixed key and fragment cache attacks */ + if (entry->key_color != rx->key->color) + return RX_DROP_UNUSABLE; + memcpy(pn, entry->last_pn, IEEE80211_CCMP_PN_LEN); for (i = IEEE80211_CCMP_PN_LEN - 1; i >= 0; i--) { pn[i]++; -- GitLab From a1d5ff5651ea592c67054233b14b30bf4452999c Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Tue, 11 May 2021 20:02:44 +0200 Subject: [PATCH 0373/3804] mac80211: properly handle A-MSDUs that start with an RFC 1042 header Properly parse A-MSDUs whose first 6 bytes happen to equal a rfc1042 header. This can occur in practice when the destination MAC address equals AA:AA:03:00:00:00. More importantly, this simplifies the next patch to mitigate A-MSDU injection attacks. Cc: stable@vger.kernel.org Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20210511200110.0b2b886492f0.I23dd5d685fe16d3b0ec8106e8f01b59f499dffed@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++-- net/mac80211/rx.c | 2 +- net/wireless/util.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5224f885a99a1..58c2cd417e89a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5760,7 +5760,7 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); */ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, const u8 *addr, enum nl80211_iftype iftype, - u8 data_offset); + u8 data_offset, bool is_amsdu); /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 @@ -5772,7 +5772,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype) { - return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0); + return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false); } /** diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 531232b91bc49..f14d32a5001dd 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2682,7 +2682,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset) if (ieee80211_data_to_8023_exthdr(skb, ðhdr, rx->sdata->vif.addr, rx->sdata->vif.type, - data_offset)) + data_offset, true)) return RX_DROP_UNUSABLE; ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr, diff --git a/net/wireless/util.c b/net/wireless/util.c index 382c5262d997d..39966a873e401 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -542,7 +542,7 @@ EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, const u8 *addr, enum nl80211_iftype iftype, - u8 data_offset) + u8 data_offset, bool is_amsdu) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct { @@ -629,7 +629,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)); tmp.h_proto = payload.proto; - if (likely((ether_addr_equal(payload.hdr, rfc1042_header) && + if (likely((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) && tmp.h_proto != htons(ETH_P_AARP) && tmp.h_proto != htons(ETH_P_IPX)) || ether_addr_equal(payload.hdr, bridge_tunnel_header))) -- GitLab From 2b8a1fee3488c602aca8bea004a087e60806a5cf Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Tue, 11 May 2021 20:02:45 +0200 Subject: [PATCH 0374/3804] cfg80211: mitigate A-MSDU aggregation attacks Mitigate A-MSDU injection attacks (CVE-2020-24588) by detecting if the destination address of a subframe equals an RFC1042 (i.e., LLC/SNAP) header, and if so dropping the complete A-MSDU frame. This mitigates known attacks, although new (unknown) aggregation-based attacks may remain possible. This defense works because in A-MSDU aggregation injection attacks, a normal encrypted Wi-Fi frame is turned into an A-MSDU frame. This means the first 6 bytes of the first A-MSDU subframe correspond to an RFC1042 header. In other words, the destination MAC address of the first A-MSDU subframe contains the start of an RFC1042 header during an aggregation attack. We can detect this and thereby prevent this specific attack. For details, see Section 7.2 of "Fragment and Forge: Breaking Wi-Fi Through Frame Aggregation and Fragmentation". Note that for kernel 4.9 and above this patch depends on "mac80211: properly handle A-MSDUs that start with a rfc1042 header". Otherwise this patch has no impact and attacks will remain possible. Cc: stable@vger.kernel.org Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20210511200110.25d93176ddaf.I9e265b597f2cd23eb44573f35b625947b386a9de@changeid Signed-off-by: Johannes Berg --- net/wireless/util.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/wireless/util.c b/net/wireless/util.c index 39966a873e401..7ec021a610aeb 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -771,6 +771,9 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, remaining = skb->len - offset; if (subframe_len > remaining) goto purge; + /* mitigate A-MSDU aggregation injection attacks */ + if (ether_addr_equal(eth.h_dest, rfc1042_header)) + goto purge; offset += sizeof(struct ethhdr); last = remaining <= subframe_len + padding; -- GitLab From 270032a2a9c4535799736142e1e7c413ca7b836e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 May 2021 20:02:46 +0200 Subject: [PATCH 0375/3804] mac80211: drop A-MSDUs on old ciphers With old ciphers (WEP and TKIP) we shouldn't be using A-MSDUs since A-MSDUs are only supported if we know that they are, and the only practical way for that is HT support which doesn't support old ciphers. However, we would normally accept them anyway. Since we check the MMIC before deaggregating A-MSDUs, and the A-MSDU bit in the QoS header is not protected in TKIP (or WEP), this enables attacks similar to CVE-2020-24588. To prevent that, drop A-MSDUs completely with old ciphers. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210511200110.076543300172.I548e6e71f1ee9cad4b9a37bf212ae7db723587aa@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f14d32a5001dd..8a72d48ad6e05 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -6,7 +6,7 @@ * Copyright 2007-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include @@ -2739,6 +2739,23 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(hdr->addr1)) return RX_DROP_UNUSABLE; + if (rx->key) { + /* + * We should not receive A-MSDUs on pre-HT connections, + * and HT connections cannot use old ciphers. Thus drop + * them, as in those cases we couldn't even have SPP + * A-MSDUs or such. + */ + switch (rx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + case WLAN_CIPHER_SUITE_TKIP: + return RX_DROP_UNUSABLE; + default: + break; + } + } + return __ieee80211_rx_h_amsdu(rx, 0); } -- GitLab From 3a11ce08c45b50d69c891d71760b7c5b92074709 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 May 2021 20:02:47 +0200 Subject: [PATCH 0376/3804] mac80211: add fragment cache to sta_info Prior patches protected against fragmentation cache attacks by coloring keys, but this shows that it can lead to issues when multiple stations use the same sequence number. Add a fragment cache to struct sta_info (in addition to the one in the interface) to separate fragments for different stations properly. This then automatically clear most of the fragment cache when a station disconnects (or reassociates) from an AP, or when client interfaces disconnect from the network, etc. On the way, also fix the comment there since this brings us in line with the recommendation in 802.11-2016 ("An AP should support ..."). Additionally, remove a useless condition (since there's no problem purging an already empty list). Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210511200110.fc35046b0d52.I1ef101e3784d13e8f6600d83de7ec9a3a45bcd52@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 26 ++++-------------------- net/mac80211/iface.c | 11 +++------- net/mac80211/rx.c | 41 ++++++++++++++++++++++++++++---------- net/mac80211/sta_info.c | 6 +++++- net/mac80211/sta_info.h | 32 ++++++++++++++++++++++++++++- 5 files changed, 73 insertions(+), 43 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 874ffe7819e53..4c714375bad0f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -50,12 +50,6 @@ struct ieee80211_local; #define IEEE80211_ENCRYPT_HEADROOM 8 #define IEEE80211_ENCRYPT_TAILROOM 18 -/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent - * reception of at least three fragmented frames. This limit can be increased - * by changing this define, at the cost of slower frame reassembly and - * increased memory use (about 2 kB of RAM per entry). */ -#define IEEE80211_FRAGMENT_MAX 4 - /* power level hasn't been configured (or set to automatic) */ #define IEEE80211_UNSET_POWER_LEVEL INT_MIN @@ -88,19 +82,6 @@ extern const u8 ieee80211_ac_to_qos_mask[IEEE80211_NUM_ACS]; #define IEEE80211_MAX_NAN_INSTANCE_ID 255 -struct ieee80211_fragment_entry { - struct sk_buff_head skb_list; - unsigned long first_frag_time; - u16 seq; - u16 extra_len; - u16 last_frag; - u8 rx_queue; - bool check_sequential_pn; /* needed for CCMP/GCMP */ - u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ - unsigned int key_color; -}; - - struct ieee80211_bss { u32 device_ts_beacon, device_ts_presp; @@ -903,9 +884,7 @@ struct ieee80211_sub_if_data { char name[IFNAMSIZ]; - /* Fragment table for host-based reassembly */ - struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; - unsigned int fragment_next; + struct ieee80211_fragment_cache frags; /* TID bitmap for NoAck policy */ u16 noack_map; @@ -2321,4 +2300,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, #define debug_noinline #endif +void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache); +void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache); + #endif /* IEEE80211_I_H */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 7032a2b59249c..2e2f73a4aa734 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -8,7 +8,7 @@ * Copyright 2008, Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (c) 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include #include @@ -677,16 +677,12 @@ static void ieee80211_set_multicast_list(struct net_device *dev) */ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) { - int i; - /* free extra data */ ieee80211_free_keys(sdata, false); ieee80211_debugfs_remove_netdev(sdata); - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) - __skb_queue_purge(&sdata->fragments[i].skb_list); - sdata->fragment_next = 0; + ieee80211_destroy_frag_cache(&sdata->frags); if (ieee80211_vif_is_mesh(&sdata->vif)) ieee80211_mesh_teardown_sdata(sdata); @@ -1930,8 +1926,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sdata->wdev.wiphy = local->hw.wiphy; sdata->local = local; - for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) - skb_queue_head_init(&sdata->fragments[i].skb_list); + ieee80211_init_frag_cache(&sdata->frags); INIT_LIST_HEAD(&sdata->key_list); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8a72d48ad6e05..7212a1bebd0c4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2123,19 +2123,34 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return result; } +void ieee80211_init_frag_cache(struct ieee80211_fragment_cache *cache) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache->entries); i++) + skb_queue_head_init(&cache->entries[i].skb_list); +} + +void ieee80211_destroy_frag_cache(struct ieee80211_fragment_cache *cache) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cache->entries); i++) + __skb_queue_purge(&cache->entries[i].skb_list); +} + static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, +ieee80211_reassemble_add(struct ieee80211_fragment_cache *cache, unsigned int frag, unsigned int seq, int rx_queue, struct sk_buff **skb) { struct ieee80211_fragment_entry *entry; - entry = &sdata->fragments[sdata->fragment_next++]; - if (sdata->fragment_next >= IEEE80211_FRAGMENT_MAX) - sdata->fragment_next = 0; + entry = &cache->entries[cache->next++]; + if (cache->next >= IEEE80211_FRAGMENT_MAX) + cache->next = 0; - if (!skb_queue_empty(&entry->skb_list)) - __skb_queue_purge(&entry->skb_list); + __skb_queue_purge(&entry->skb_list); __skb_queue_tail(&entry->skb_list, *skb); /* no need for locking */ *skb = NULL; @@ -2150,14 +2165,14 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, } static inline struct ieee80211_fragment_entry * -ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, +ieee80211_reassemble_find(struct ieee80211_fragment_cache *cache, unsigned int frag, unsigned int seq, int rx_queue, struct ieee80211_hdr *hdr) { struct ieee80211_fragment_entry *entry; int i, idx; - idx = sdata->fragment_next; + idx = cache->next; for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { struct ieee80211_hdr *f_hdr; struct sk_buff *f_skb; @@ -2166,7 +2181,7 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, if (idx < 0) idx = IEEE80211_FRAGMENT_MAX - 1; - entry = &sdata->fragments[idx]; + entry = &cache->entries[idx]; if (skb_queue_empty(&entry->skb_list) || entry->seq != seq || entry->rx_queue != rx_queue || entry->last_frag + 1 != frag) @@ -2207,6 +2222,7 @@ static bool requires_sequential_pn(struct ieee80211_rx_data *rx, __le16 fc) static ieee80211_rx_result debug_noinline ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) { + struct ieee80211_fragment_cache *cache = &rx->sdata->frags; struct ieee80211_hdr *hdr; u16 sc; __le16 fc; @@ -2228,6 +2244,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) goto out_no_led; } + if (rx->sta) + cache = &rx->sta->frags; + if (likely(!ieee80211_has_morefrags(fc) && frag == 0)) goto out; @@ -2246,7 +2265,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (frag == 0) { /* This is the first fragment of a new frame. */ - entry = ieee80211_reassemble_add(rx->sdata, frag, seq, + entry = ieee80211_reassemble_add(cache, frag, seq, rx->seqno_idx, &(rx->skb)); if (requires_sequential_pn(rx, fc)) { int queue = rx->security_idx; @@ -2274,7 +2293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is a fragment for a frame that should already be pending in * fragment cache. Add this fragment to the end of the pending entry. */ - entry = ieee80211_reassemble_find(rx->sdata, frag, seq, + entry = ieee80211_reassemble_find(cache, frag, seq, rx->seqno_idx, hdr); if (!entry) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index ec6973ee88ef4..f2fb69da9b6e1 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ #include @@ -392,6 +392,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, u64_stats_init(&sta->rx_stats.syncp); + ieee80211_init_frag_cache(&sta->frags); + sta->sta_state = IEEE80211_STA_NONE; /* Mark TID as unreserved */ @@ -1102,6 +1104,8 @@ static void __sta_info_destroy_part2(struct sta_info *sta) ieee80211_sta_debugfs_remove(sta); + ieee80211_destroy_frag_cache(&sta->frags); + cleanup_single_sta(sta); } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 78b9d0c7cc583..5c56d29a619e6 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -3,7 +3,7 @@ * Copyright 2002-2005, Devicescape Software, Inc. * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright(c) 2020 Intel Corporation + * Copyright(c) 2020-2021 Intel Corporation */ #ifndef STA_INFO_H @@ -438,6 +438,33 @@ struct ieee80211_sta_rx_stats { u64 msdu[IEEE80211_NUM_TIDS + 1]; }; +/* + * IEEE 802.11-2016 (10.6 "Defragmentation") recommends support for "concurrent + * reception of at least one MSDU per access category per associated STA" + * on APs, or "at least one MSDU per access category" on other interface types. + * + * This limit can be increased by changing this define, at the cost of slower + * frame reassembly and increased memory use while fragments are pending. + */ +#define IEEE80211_FRAGMENT_MAX 4 + +struct ieee80211_fragment_entry { + struct sk_buff_head skb_list; + unsigned long first_frag_time; + u16 seq; + u16 extra_len; + u16 last_frag; + u8 rx_queue; + bool check_sequential_pn; /* needed for CCMP/GCMP */ + u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ + unsigned int key_color; +}; + +struct ieee80211_fragment_cache { + struct ieee80211_fragment_entry entries[IEEE80211_FRAGMENT_MAX]; + unsigned int next; +}; + /* * The bandwidth threshold below which the per-station CoDel parameters will be * scaled to be more lenient (to prevent starvation of slow stations). This @@ -531,6 +558,7 @@ struct ieee80211_sta_rx_stats { * @status_stats.last_ack_signal: last ACK signal * @status_stats.ack_signal_filled: last ACK signal validity * @status_stats.avg_ack_signal: average ACK signal + * @frags: fragment cache */ struct sta_info { /* General information, mostly static */ @@ -639,6 +667,8 @@ struct sta_info { struct cfg80211_chan_def tdls_chandef; + struct ieee80211_fragment_cache frags; + /* keep last! */ struct ieee80211_sta sta; }; -- GitLab From bf30ca922a0c0176007e074b0acc77ed345e9990 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 May 2021 20:02:48 +0200 Subject: [PATCH 0377/3804] mac80211: check defrag PN against current frame As pointed out by Mathy Vanhoef, we implement the RX PN check on fragmented frames incorrectly - we check against the last received PN prior to the new frame, rather than to the one in this frame itself. Prior patches addressed the security issue here, but in order to be able to reason better about the code, fix it to really compare against the current frame's PN, not the last stored one. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210511200110.bfbc340ff071.Id0b690e581da7d03d76df90bb0e3fd55930bc8a0@changeid Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 11 +++++++++-- net/mac80211/rx.c | 5 ++--- net/mac80211/wpa.c | 13 +++++++++---- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4c714375bad0f..214404a558fb6 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -223,8 +223,15 @@ struct ieee80211_rx_data { */ int security_idx; - u32 tkip_iv32; - u16 tkip_iv16; + union { + struct { + u32 iv32; + u16 iv16; + } tkip; + struct { + u8 pn[IEEE80211_CCMP_PN_LEN]; + } ccm_gcm; + }; }; struct ieee80211_csa_settings { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7212a1bebd0c4..b619c47e1d120 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2308,7 +2308,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (entry->check_sequential_pn) { int i; u8 pn[IEEE80211_CCMP_PN_LEN], *rpn; - int queue; if (!requires_sequential_pn(rx, fc)) return RX_DROP_UNUSABLE; @@ -2323,8 +2322,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (pn[i]) break; } - queue = rx->security_idx; - rpn = rx->key->u.ccmp.rx_pn[queue]; + + rpn = rx->ccm_gcm.pn; if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) return RX_DROP_UNUSABLE; memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 91bf32af55e9a..bca47fad5a162 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -3,6 +3,7 @@ * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2008, Jouni Malinen * Copyright (C) 2016-2017 Intel Deutschland GmbH + * Copyright (C) 2020-2021 Intel Corporation */ #include @@ -167,8 +168,8 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) update_iv: /* update IV in key information to be able to detect replays */ - rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip_iv32; - rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip_iv16; + rx->key->u.tkip.rx[rx->security_idx].iv32 = rx->tkip.iv32; + rx->key->u.tkip.rx[rx->security_idx].iv16 = rx->tkip.iv16; return RX_CONTINUE; @@ -294,8 +295,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) key, skb->data + hdrlen, skb->len - hdrlen, rx->sta->sta.addr, hdr->addr1, hwaccel, rx->security_idx, - &rx->tkip_iv32, - &rx->tkip_iv16); + &rx->tkip.iv32, + &rx->tkip.iv16); if (res != TKIP_DECRYPT_OK) return RX_DROP_UNUSABLE; @@ -553,6 +554,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, } memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN); + if (unlikely(ieee80211_is_frag(hdr))) + memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN); } /* Remove CCMP header and MIC */ @@ -781,6 +784,8 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) } memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN); + if (unlikely(ieee80211_is_frag(hdr))) + memcpy(rx->ccm_gcm.pn, pn, IEEE80211_CCMP_PN_LEN); } /* Remove GCMP header and MIC */ -- GitLab From 7e44a0b597f04e67eee8cdcbe7ee706c6f5de38b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 May 2021 20:02:49 +0200 Subject: [PATCH 0378/3804] mac80211: prevent attacks on TKIP/WEP as well Similar to the issues fixed in previous patches, TKIP and WEP should be protected even if for TKIP we have the Michael MIC protecting it, and WEP is broken anyway. However, this also somewhat protects potential other algorithms that drivers might implement. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210511200110.430e8c202313.Ia37e4e5b6b3eaab1a5ae050e015f6c92859dbe27@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 12 ++++++++++++ net/mac80211/sta_info.h | 3 ++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b619c47e1d120..4454ec47283f8 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2274,6 +2274,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) * next fragment has a sequential PN value. */ entry->check_sequential_pn = true; + entry->is_protected = true; entry->key_color = rx->key->color; memcpy(entry->last_pn, rx->key->u.ccmp.rx_pn[queue], @@ -2286,6 +2287,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sizeof(rx->key->u.gcmp.rx_pn[queue])); BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN != IEEE80211_GCMP_PN_LEN); + } else if (rx->key && ieee80211_has_protected(fc)) { + entry->is_protected = true; + entry->key_color = rx->key->color; } return RX_QUEUED; } @@ -2327,6 +2331,14 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) if (memcmp(pn, rpn, IEEE80211_CCMP_PN_LEN)) return RX_DROP_UNUSABLE; memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); + } else if (entry->is_protected && + (!rx->key || !ieee80211_has_protected(fc) || + rx->key->color != entry->key_color)) { + /* Drop this as a mixed key or fragment cache attack, even + * if for TKIP Michael MIC should protect us, and WEP is a + * lost cause anyway. + */ + return RX_DROP_UNUSABLE; } skb_pull(rx->skb, ieee80211_hdrlen(fc)); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 5c56d29a619e6..0333072ebd982 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -455,7 +455,8 @@ struct ieee80211_fragment_entry { u16 extra_len; u16 last_frag; u8 rx_queue; - bool check_sequential_pn; /* needed for CCMP/GCMP */ + u8 check_sequential_pn:1, /* needed for CCMP/GCMP */ + is_protected:1; u8 last_pn[6]; /* PN of the last fragment if CCMP was used */ unsigned int key_color; }; -- GitLab From a8c4d76a8dd4fb9666fc8919a703d85fb8f44ed8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 May 2021 20:02:50 +0200 Subject: [PATCH 0379/3804] mac80211: do not accept/forward invalid EAPOL frames EAPOL frames are used for authentication and key management between the AP and each individual STA associated in the BSS. Those frames are not supposed to be sent by one associated STA to another associated STA (either unicast for broadcast/multicast). Similarly, in 802.11 they're supposed to be sent to the authenticator (AP) address. Since it is possible for unexpected EAPOL frames to result in misbehavior in supplicant implementations, it is better for the AP to not allow such cases to be forwarded to other clients either directly, or indirectly if the AP interface is part of a bridge. Accept EAPOL (control port) frames only if they're transmitted to the own address, or, due to interoperability concerns, to the PAE group address. Disable forwarding of EAPOL (or well, the configured control port protocol) frames back to wireless medium in all cases. Previously, these frames were accepted from fully authenticated and authorized stations and also from unauthenticated stations for one of the cases. Additionally, to avoid forwarding by the bridge, rewrite the PAE group address case to the local MAC address. Cc: stable@vger.kernel.org Co-developed-by: Jouni Malinen Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.cb327ed0cabe.Ib7dcffa2a31f0913d660de65ba3c8aca75b1d10f@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4454ec47283f8..22a925899a9ec 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2531,13 +2531,13 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc) struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; /* - * Allow EAPOL frames to us/the PAE group address regardless - * of whether the frame was encrypted or not. + * Allow EAPOL frames to us/the PAE group address regardless of + * whether the frame was encrypted or not, and always disallow + * all other destination addresses for them. */ - if (ehdr->h_proto == rx->sdata->control_port_protocol && - (ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) || - ether_addr_equal(ehdr->h_dest, pae_group_addr))) - return true; + if (unlikely(ehdr->h_proto == rx->sdata->control_port_protocol)) + return ether_addr_equal(ehdr->h_dest, rx->sdata->vif.addr) || + ether_addr_equal(ehdr->h_dest, pae_group_addr); if (ieee80211_802_1x_port_control(rx) || ieee80211_drop_unencrypted(rx, fc)) @@ -2562,8 +2562,28 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb, cfg80211_rx_control_port(dev, skb, noencrypt); dev_kfree_skb(skb); } else { + struct ethhdr *ehdr = (void *)skb_mac_header(skb); + memset(skb->cb, 0, sizeof(skb->cb)); + /* + * 802.1X over 802.11 requires that the authenticator address + * be used for EAPOL frames. However, 802.1X allows the use of + * the PAE group address instead. If the interface is part of + * a bridge and we pass the frame with the PAE group address, + * then the bridge will forward it to the network (even if the + * client was not associated yet), which isn't supposed to + * happen. + * To avoid that, rewrite the destination address to our own + * address, so that the authenticator (e.g. hostapd) will see + * the frame, but bridge won't forward it anywhere else. Note + * that due to earlier filtering, the only other address can + * be the PAE group address. + */ + if (unlikely(skb->protocol == sdata->control_port_protocol && + !ether_addr_equal(ehdr->h_dest, sdata->vif.addr))) + ether_addr_copy(ehdr->h_dest, sdata->vif.addr); + /* deliver to local stack */ if (rx->list) list_add_tail(&skb->list, rx->list); @@ -2603,6 +2623,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if ((sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && + ehdr->h_proto != rx->sdata->control_port_protocol && (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { if (is_multicast_ether_addr(ehdr->h_dest) && ieee80211_vif_get_num_mcast_if(sdata) != 0) { -- GitLab From 3edc6b0d6c061a70d8ca3c3c72eb1f58ce29bfb1 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:51 +0200 Subject: [PATCH 0380/3804] mac80211: extend protection against mixed key and fragment cache attacks For some chips/drivers, e.g., QCA6174 with ath10k, the decryption is done by the hardware, and the Protected bit in the Frame Control field is cleared in the lower level driver before the frame is passed to mac80211. In such cases, the condition for ieee80211_has_protected() is not met in ieee80211_rx_h_defragment() of mac80211 and the new security validation steps are not executed. Extend mac80211 to cover the case where the Protected bit has been cleared, but the frame is indicated as having been decrypted by the hardware. This extends protection against mixed key and fragment cache attack for additional drivers/chips. This fixes CVE-2020-24586 and CVE-2020-24587 for such cases. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.037aa5ca0390.I7bb888e2965a0db02a67075fcb5deb50eb7408aa@changeid Signed-off-by: Johannes Berg --- net/mac80211/rx.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 22a925899a9ec..1bb43edd47b6c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2229,6 +2229,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) unsigned int frag, seq; struct ieee80211_fragment_entry *entry; struct sk_buff *skb; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); hdr = (struct ieee80211_hdr *)rx->skb->data; fc = hdr->frame_control; @@ -2287,7 +2288,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) sizeof(rx->key->u.gcmp.rx_pn[queue])); BUILD_BUG_ON(IEEE80211_CCMP_PN_LEN != IEEE80211_GCMP_PN_LEN); - } else if (rx->key && ieee80211_has_protected(fc)) { + } else if (rx->key && + (ieee80211_has_protected(fc) || + (status->flag & RX_FLAG_DECRYPTED))) { entry->is_protected = true; entry->key_color = rx->key->color; } @@ -2332,13 +2335,19 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; memcpy(entry->last_pn, pn, IEEE80211_CCMP_PN_LEN); } else if (entry->is_protected && - (!rx->key || !ieee80211_has_protected(fc) || + (!rx->key || + (!ieee80211_has_protected(fc) && + !(status->flag & RX_FLAG_DECRYPTED)) || rx->key->color != entry->key_color)) { /* Drop this as a mixed key or fragment cache attack, even * if for TKIP Michael MIC should protect us, and WEP is a * lost cause anyway. */ return RX_DROP_UNUSABLE; + } else if (entry->is_protected && rx->key && + entry->key_color != rx->key->color && + (status->flag & RX_FLAG_DECRYPTED)) { + return RX_DROP_UNUSABLE; } skb_pull(rx->skb, ieee80211_hdrlen(fc)); -- GitLab From a1166b2653db2f3de7338b9fb8a0f6e924b904ee Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:52 +0200 Subject: [PATCH 0381/3804] ath10k: add CCMP PN replay protection for fragmented frames for PCIe PN replay check for not fragmented frames is finished in the firmware, but this was not done for fragmented frames when ath10k is used with QCA6174/QCA6377 PCIe. mac80211 has the function ieee80211_rx_h_defragment() for PN replay check for fragmented frames, but this does not get checked with QCA6174 due to the ieee80211_has_protected() condition not matching the cleared Protected bit case. Validate the PN of received fragmented frames within ath10k when CCMP is used and drop the fragment if the PN is not correct (incremented by exactly one from the previous fragment). This applies only for QCA6174/QCA6377 PCIe. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.9ba2664866a4.I756e47b67e210dba69966d989c4711ffc02dc6bc@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt.h | 1 + drivers/net/wireless/ath/ath10k/htt_rx.c | 99 +++++++++++++++++++++++- 2 files changed, 96 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h index 956157946106c..dbc8aef82a65f 100644 --- a/drivers/net/wireless/ath/ath10k/htt.h +++ b/drivers/net/wireless/ath/ath10k/htt.h @@ -845,6 +845,7 @@ enum htt_security_types { #define ATH10K_HTT_TXRX_PEER_SECURITY_MAX 2 #define ATH10K_TXRX_NUM_EXT_TIDS 19 +#define ATH10K_TXRX_NON_QOS_TID 16 enum htt_security_flags { #define HTT_SECURITY_TYPE_MASK 0x7F diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 1a08156d5011d..f1e5bce8b14f4 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1746,16 +1746,87 @@ static void ath10k_htt_rx_h_csum_offload(struct sk_buff *msdu) msdu->ip_summed = ath10k_htt_rx_get_csum_state(msdu); } +static u64 ath10k_htt_rx_h_get_pn(struct ath10k *ar, struct sk_buff *skb, + u16 offset, + enum htt_rx_mpdu_encrypt_type enctype) +{ + struct ieee80211_hdr *hdr; + u64 pn = 0; + u8 *ehdr; + + hdr = (struct ieee80211_hdr *)(skb->data + offset); + ehdr = skb->data + offset + ieee80211_hdrlen(hdr->frame_control); + + if (enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) { + pn = ehdr[0]; + pn |= (u64)ehdr[1] << 8; + pn |= (u64)ehdr[4] << 16; + pn |= (u64)ehdr[5] << 24; + pn |= (u64)ehdr[6] << 32; + pn |= (u64)ehdr[7] << 40; + } + return pn; +} + +static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar, + struct sk_buff *skb, + u16 peer_id, + u16 offset, + enum htt_rx_mpdu_encrypt_type enctype) +{ + struct ath10k_peer *peer; + union htt_rx_pn_t *last_pn, new_pn = {0}; + struct ieee80211_hdr *hdr; + bool more_frags; + u8 tid, frag_number; + u32 seq; + + peer = ath10k_peer_find_by_id(ar, peer_id); + if (!peer) { + ath10k_dbg(ar, ATH10K_DBG_HTT, "invalid peer for frag pn check\n"); + return false; + } + + hdr = (struct ieee80211_hdr *)(skb->data + offset); + if (ieee80211_is_data_qos(hdr->frame_control)) + tid = ieee80211_get_tid(hdr); + else + tid = ATH10K_TXRX_NON_QOS_TID; + + last_pn = &peer->frag_tids_last_pn[tid]; + new_pn.pn48 = ath10k_htt_rx_h_get_pn(ar, skb, offset, enctype); + more_frags = ieee80211_has_morefrags(hdr->frame_control); + frag_number = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_FRAG; + seq = (__le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ) >> 4; + + if (frag_number == 0) { + last_pn->pn48 = new_pn.pn48; + peer->frag_tids_seq[tid] = seq; + } else { + if (seq != peer->frag_tids_seq[tid]) + return false; + + if (new_pn.pn48 != last_pn->pn48 + 1) + return false; + + last_pn->pn48 = new_pn.pn48; + } + + return true; +} + static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, struct sk_buff_head *amsdu, struct ieee80211_rx_status *status, bool fill_crypt_header, u8 *rx_hdr, - enum ath10k_pkt_rx_err *err) + enum ath10k_pkt_rx_err *err, + u16 peer_id, + bool frag) { struct sk_buff *first; struct sk_buff *last; - struct sk_buff *msdu; + struct sk_buff *msdu, *temp; struct htt_rx_desc *rxd; struct ieee80211_hdr *hdr; enum htt_rx_mpdu_encrypt_type enctype; @@ -1768,6 +1839,7 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, bool is_decrypted; bool is_mgmt; u32 attention; + bool frag_pn_check = true; if (skb_queue_empty(amsdu)) return; @@ -1866,6 +1938,24 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, } skb_queue_walk(amsdu, msdu) { + if (frag && !fill_crypt_header && is_decrypted && + enctype == HTT_RX_MPDU_ENCRYPT_AES_CCM_WPA2) + frag_pn_check = ath10k_htt_rx_h_frag_pn_check(ar, + msdu, + peer_id, + 0, + enctype); + + if (!frag_pn_check) { + /* Discard the fragment with invalid PN */ + temp = msdu->prev; + __skb_unlink(msdu, amsdu); + dev_kfree_skb_any(msdu); + msdu = temp; + frag_pn_check = true; + continue; + } + ath10k_htt_rx_h_csum_offload(msdu); ath10k_htt_rx_h_undecap(ar, msdu, status, first_hdr, enctype, is_decrypted); @@ -2071,7 +2161,8 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) ath10k_htt_rx_h_unchain(ar, &amsdu, &drop_cnt, &unchain_cnt); ath10k_htt_rx_h_filter(ar, &amsdu, rx_status, &drop_cnt_filter); - ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true, first_hdr, &err); + ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status, true, first_hdr, &err, 0, + false); msdus_to_queue = skb_queue_len(&amsdu); ath10k_htt_rx_h_enqueue(ar, &amsdu, rx_status); @@ -3027,7 +3118,7 @@ static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); ath10k_htt_rx_h_filter(ar, &amsdu, status, NULL); ath10k_htt_rx_h_mpdu(ar, &amsdu, status, false, NULL, - NULL); + NULL, peer_id, frag); ath10k_htt_rx_h_enqueue(ar, &amsdu, status); break; case -EAGAIN: -- GitLab From 65c415a144ad8132b6a6d97d4a1919ffc728e2d1 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:53 +0200 Subject: [PATCH 0382/3804] ath10k: drop fragments with multicast DA for PCIe Fragmentation is not used with multicast frames. Discard unexpected fragments with multicast DA. This fixes CVE-2020-26145. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.5a0bd289bda8.Idd6ebea20038fb1cfee6de924aa595e5647c9eae@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index f1e5bce8b14f4..cb04848ed5cb6 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1768,6 +1768,16 @@ static u64 ath10k_htt_rx_h_get_pn(struct ath10k *ar, struct sk_buff *skb, return pn; } +static bool ath10k_htt_rx_h_frag_multicast_check(struct ath10k *ar, + struct sk_buff *skb, + u16 offset) +{ + struct ieee80211_hdr *hdr; + + hdr = (struct ieee80211_hdr *)(skb->data + offset); + return !is_multicast_ether_addr(hdr->addr1); +} + static bool ath10k_htt_rx_h_frag_pn_check(struct ath10k *ar, struct sk_buff *skb, u16 peer_id, @@ -1839,7 +1849,7 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, bool is_decrypted; bool is_mgmt; u32 attention; - bool frag_pn_check = true; + bool frag_pn_check = true, multicast_check = true; if (skb_queue_empty(amsdu)) return; @@ -1946,13 +1956,20 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, 0, enctype); - if (!frag_pn_check) { - /* Discard the fragment with invalid PN */ + if (frag) + multicast_check = ath10k_htt_rx_h_frag_multicast_check(ar, + msdu, + 0); + + if (!frag_pn_check || !multicast_check) { + /* Discard the fragment with invalid PN or multicast DA + */ temp = msdu->prev; __skb_unlink(msdu, amsdu); dev_kfree_skb_any(msdu); msdu = temp; frag_pn_check = true; + multicast_check = true; continue; } -- GitLab From 40e7462dad6f3d06efdb17d26539e61ab6e34db1 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:54 +0200 Subject: [PATCH 0383/3804] ath10k: drop fragments with multicast DA for SDIO Fragmentation is not used with multicast frames. Discard unexpected fragments with multicast DA. This fixes CVE-2020-26145. Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00049 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.9ca6ca7945a9.I1e18b514590af17c155bda86699bc3a971a8dcf4@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index cb04848ed5cb6..b1d93ff5215a6 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2617,6 +2617,13 @@ static bool ath10k_htt_rx_proc_rx_frag_ind_hl(struct ath10k_htt *htt, rx_desc = (struct htt_hl_rx_desc *)(skb->data + tot_hdr_len); rx_desc_info = __le32_to_cpu(rx_desc->info); + hdr = (struct ieee80211_hdr *)((u8 *)rx_desc + rx_hl->fw_desc.len); + + if (is_multicast_ether_addr(hdr->addr1)) { + /* Discard the fragment with multicast DA */ + goto err; + } + if (!MS(rx_desc_info, HTT_RX_DESC_HL_INFO_ENCRYPTED)) { spin_unlock_bh(&ar->data_lock); return ath10k_htt_rx_proc_rx_ind_hl(htt, &resp->rx_ind_hl, skb, @@ -2624,8 +2631,6 @@ static bool ath10k_htt_rx_proc_rx_frag_ind_hl(struct ath10k_htt *htt, HTT_RX_NON_TKIP_MIC); } - hdr = (struct ieee80211_hdr *)((u8 *)rx_desc + rx_hl->fw_desc.len); - if (ieee80211_has_retry(hdr->frame_control)) goto err; -- GitLab From 079a108feba474b4b32bd3471db03e11f2f83b81 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:55 +0200 Subject: [PATCH 0384/3804] ath10k: drop MPDU which has discard flag set by firmware for SDIO When the discard flag is set by the firmware for an MPDU, it should be dropped. This allows a mitigation for CVE-2020-24588 to be implemented in the firmware. Tested-on: QCA6174 hw3.2 SDIO WLAN.RMH.4.4.1-00049 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.11968c725b5c.Idd166365ebea2771c0c0a38c78b5060750f90e17@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 5 +++++ drivers/net/wireless/ath/ath10k/rx_desc.h | 14 +++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index b1d93ff5215a6..12451ab66a191 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2312,6 +2312,11 @@ static bool ath10k_htt_rx_proc_rx_ind_hl(struct ath10k_htt *htt, fw_desc = &rx->fw_desc; rx_desc_len = fw_desc->len; + if (fw_desc->u.bits.discard) { + ath10k_dbg(ar, ATH10K_DBG_HTT, "htt discard mpdu\n"); + goto err; + } + /* I have not yet seen any case where num_mpdu_ranges > 1. * qcacld does not seem handle that case either, so we introduce the * same limitiation here as well. diff --git a/drivers/net/wireless/ath/ath10k/rx_desc.h b/drivers/net/wireless/ath/ath10k/rx_desc.h index f2b6bf8f0d60d..705b6295e4663 100644 --- a/drivers/net/wireless/ath/ath10k/rx_desc.h +++ b/drivers/net/wireless/ath/ath10k/rx_desc.h @@ -1282,7 +1282,19 @@ struct fw_rx_desc_base { #define FW_RX_DESC_UDP (1 << 6) struct fw_rx_desc_hl { - u8 info0; + union { + struct { + u8 discard:1, + forward:1, + any_err:1, + dup_err:1, + reserved:1, + inspect:1, + extension:2; + } bits; + u8 info0; + } u; + u8 version; u8 len; u8 flags; -- GitLab From 0dc267b13f3a7e8424a898815dd357211b737330 Mon Sep 17 00:00:00 2001 From: Wen Gong Date: Tue, 11 May 2021 20:02:56 +0200 Subject: [PATCH 0385/3804] ath10k: Fix TKIP Michael MIC verification for PCIe TKIP Michael MIC was not verified properly for PCIe cases since the validation steps in ieee80211_rx_h_michael_mic_verify() in mac80211 did not get fully executed due to unexpected flag values in ieee80211_rx_status. Fix this by setting the flags property to meet mac80211 expectations for performing Michael MIC validation there. This fixes CVE-2020-26141. It does the same as ath10k_htt_rx_proc_rx_ind_hl() for SDIO which passed MIC verification case. This applies only to QCA6174/QCA9377 PCIe. Tested-on: QCA6174 hw3.2 PCI WLAN.RM.4.4.1-00110-QCARMSWP-1 Cc: stable@vger.kernel.org Signed-off-by: Wen Gong Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.c3f1d42c6746.I795593fcaae941c471425b8c7d5f7bb185d29142@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 12451ab66a191..87196f9bbdea3 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1974,6 +1974,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, } ath10k_htt_rx_h_csum_offload(msdu); + + if (frag && !fill_crypt_header && + enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA) + status->flag &= ~RX_FLAG_MMIC_STRIPPED; + ath10k_htt_rx_h_undecap(ar, msdu, status, first_hdr, enctype, is_decrypted); @@ -1991,6 +1996,11 @@ static void ath10k_htt_rx_h_mpdu(struct ath10k *ar, hdr = (void *)msdu->data; hdr->frame_control &= ~__cpu_to_le16(IEEE80211_FCTL_PROTECTED); + + if (frag && !fill_crypt_header && + enctype == HTT_RX_MPDU_ENCRYPT_TKIP_WPA) + status->flag &= ~RX_FLAG_IV_STRIPPED & + ~RX_FLAG_MMIC_STRIPPED; } } -- GitLab From 62a8ff67eba52dae9b107e1fb8827054ed00a265 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 11 May 2021 20:02:57 +0200 Subject: [PATCH 0386/3804] ath10k: Validate first subframe of A-MSDU before processing the list In certain scenarios a normal MSDU can be received as an A-MSDU when the A-MSDU present bit of a QoS header gets flipped during reception. Since this bit is unauthenticated, the hardware crypto engine can pass the frame to the driver without any error indication. This could result in processing unintended subframes collected in the A-MSDU list. Hence, validate A-MSDU list by checking if the first frame has a valid subframe header. Comparing the non-aggregated MSDU and an A-MSDU, the fields of the first subframe DA matches the LLC/SNAP header fields of a normal MSDU. In order to avoid processing such frames, add a validation to filter such A-MSDU frames where the first subframe header DA matches with the LLC/SNAP header pattern. Tested-on: QCA9984 hw1.0 PCI 10.4-3.10-00047 Cc: stable@vger.kernel.org Signed-off-by: Sriram R Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.e6f5eb7b9847.I38a77ae26096862527a5eab73caebd7346af8b66@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath10k/htt_rx.c | 61 ++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 87196f9bbdea3..7ffb5d5b2a70e 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -2108,14 +2108,62 @@ static void ath10k_htt_rx_h_unchain(struct ath10k *ar, ath10k_unchain_msdu(amsdu, unchain_cnt); } +static bool ath10k_htt_rx_validate_amsdu(struct ath10k *ar, + struct sk_buff_head *amsdu) +{ + u8 *subframe_hdr; + struct sk_buff *first; + bool is_first, is_last; + struct htt_rx_desc *rxd; + struct ieee80211_hdr *hdr; + size_t hdr_len, crypto_len; + enum htt_rx_mpdu_encrypt_type enctype; + int bytes_aligned = ar->hw_params.decap_align_bytes; + + first = skb_peek(amsdu); + + rxd = (void *)first->data - sizeof(*rxd); + hdr = (void *)rxd->rx_hdr_status; + + is_first = !!(rxd->msdu_end.common.info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_FIRST_MSDU)); + is_last = !!(rxd->msdu_end.common.info0 & + __cpu_to_le32(RX_MSDU_END_INFO0_LAST_MSDU)); + + /* Return in case of non-aggregated msdu */ + if (is_first && is_last) + return true; + + /* First msdu flag is not set for the first msdu of the list */ + if (!is_first) + return false; + + enctype = MS(__le32_to_cpu(rxd->mpdu_start.info0), + RX_MPDU_START_INFO0_ENCRYPT_TYPE); + + hdr_len = ieee80211_hdrlen(hdr->frame_control); + crypto_len = ath10k_htt_rx_crypto_param_len(ar, enctype); + + subframe_hdr = (u8 *)hdr + round_up(hdr_len, bytes_aligned) + + crypto_len; + + /* Validate if the amsdu has a proper first subframe. + * There are chances a single msdu can be received as amsdu when + * the unauthenticated amsdu flag of a QoS header + * gets flipped in non-SPP AMSDU's, in such cases the first + * subframe has llc/snap header in place of a valid da. + * return false if the da matches rfc1042 pattern + */ + if (ether_addr_equal(subframe_hdr, rfc1042_header)) + return false; + + return true; +} + static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar, struct sk_buff_head *amsdu, struct ieee80211_rx_status *rx_status) { - /* FIXME: It might be a good idea to do some fuzzy-testing to drop - * invalid/dangerous frames. - */ - if (!rx_status->freq) { ath10k_dbg(ar, ATH10K_DBG_HTT, "no channel configured; ignoring frame(s)!\n"); return false; @@ -2126,6 +2174,11 @@ static bool ath10k_htt_rx_amsdu_allowed(struct ath10k *ar, return false; } + if (!ath10k_htt_rx_validate_amsdu(ar, amsdu)) { + ath10k_dbg(ar, ATH10K_DBG_HTT, "invalid amsdu received\n"); + return false; + } + return true; } -- GitLab From c3944a5621026c176001493d48ee66ff94e1a39a Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 11 May 2021 20:02:58 +0200 Subject: [PATCH 0387/3804] ath11k: Clear the fragment cache during key install Currently the fragment cache setup during peer assoc is cleared only during peer delete. In case a key reinstallation happens with the same peer, the same fragment cache with old fragments added before key installation could be clubbed with fragments received after. This might be exploited to mix fragments of different data resulting in a proper unintended reassembled packet to be passed up the stack. Hence flush the fragment cache on every key installation to prevent potential attacks (CVE-2020-24587). Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01734-QCAHKSWPL_SILICONZ-1 v2 Cc: stable@vger.kernel.org Signed-off-by: Sriram R Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.218dc777836f.I9af6fc76215a35936c4152552018afb5079c5d8c@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/dp_rx.c | 18 ++++++++++++++++++ drivers/net/wireless/ath/ath11k/dp_rx.h | 1 + drivers/net/wireless/ath/ath11k/mac.c | 6 ++++++ 3 files changed, 25 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 1d9aa1bb6b6e9..3382f8bfcb48d 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -852,6 +852,24 @@ static void ath11k_dp_rx_frags_cleanup(struct dp_rx_tid *rx_tid, bool rel_link_d __skb_queue_purge(&rx_tid->rx_frags); } +void ath11k_peer_frags_flush(struct ath11k *ar, struct ath11k_peer *peer) +{ + struct dp_rx_tid *rx_tid; + int i; + + lockdep_assert_held(&ar->ab->base_lock); + + for (i = 0; i <= IEEE80211_NUM_TIDS; i++) { + rx_tid = &peer->rx_tid[i]; + + spin_unlock_bh(&ar->ab->base_lock); + del_timer_sync(&rx_tid->frag_timer); + spin_lock_bh(&ar->ab->base_lock); + + ath11k_dp_rx_frags_cleanup(rx_tid, true); + } +} + void ath11k_peer_rx_tid_cleanup(struct ath11k *ar, struct ath11k_peer *peer) { struct dp_rx_tid *rx_tid; diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.h b/drivers/net/wireless/ath/ath11k/dp_rx.h index bf399312b5ff5..623da3bf9dc81 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.h +++ b/drivers/net/wireless/ath/ath11k/dp_rx.h @@ -49,6 +49,7 @@ int ath11k_dp_peer_rx_pn_replay_config(struct ath11k_vif *arvif, const u8 *peer_addr, enum set_key_cmd key_cmd, struct ieee80211_key_conf *key); +void ath11k_peer_frags_flush(struct ath11k *ar, struct ath11k_peer *peer); void ath11k_peer_rx_tid_cleanup(struct ath11k *ar, struct ath11k_peer *peer); void ath11k_peer_rx_tid_delete(struct ath11k *ar, struct ath11k_peer *peer, u8 tid); diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 4df425dd31a26..9d0ff150ec30f 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -2779,6 +2779,12 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, */ spin_lock_bh(&ab->base_lock); peer = ath11k_peer_find(ab, arvif->vdev_id, peer_addr); + + /* flush the fragments cache during key (re)install to + * ensure all frags in the new frag list belong to the same key. + */ + if (peer && cmd == SET_KEY) + ath11k_peer_frags_flush(ar, peer); spin_unlock_bh(&ab->base_lock); if (!peer) { -- GitLab From 210f563b097997ce917e82feab356b298bfd12b0 Mon Sep 17 00:00:00 2001 From: Sriram R Date: Tue, 11 May 2021 20:02:59 +0200 Subject: [PATCH 0388/3804] ath11k: Drop multicast fragments Fragmentation is used only with unicast frames. Drop multicast fragments to avoid any undesired behavior. Tested-on: IPQ8074 hw2.0 AHB WLAN.HK.2.4.0.1-01734-QCAHKSWPL_SILICONZ-1 v2 Cc: stable@vger.kernel.org Signed-off-by: Sriram R Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20210511200110.1d53bfd20a8b.Ibb63283051bb5e2c45951932c6e1f351d5a73dc3@changeid Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath11k/dp_rx.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 3382f8bfcb48d..603d2f93ac18f 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -260,6 +260,16 @@ static void ath11k_dp_rxdesc_set_msdu_len(struct ath11k_base *ab, ab->hw_params.hw_ops->rx_desc_set_msdu_len(desc, len); } +static bool ath11k_dp_rx_h_attn_is_mcbc(struct ath11k_base *ab, + struct hal_rx_desc *desc) +{ + struct rx_attention *attn = ath11k_dp_rx_get_attention(ab, desc); + + return ath11k_dp_rx_h_msdu_end_first_msdu(ab, desc) && + (!!FIELD_GET(RX_ATTENTION_INFO1_MCAST_BCAST, + __le32_to_cpu(attn->info1))); +} + static void ath11k_dp_service_mon_ring(struct timer_list *t) { struct ath11k_base *ab = from_timer(ab, t, mon_reap_timer); @@ -3468,6 +3478,7 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar, u8 tid; int ret = 0; bool more_frags; + bool is_mcbc; rx_desc = (struct hal_rx_desc *)msdu->data; peer_id = ath11k_dp_rx_h_mpdu_start_peer_id(ar->ab, rx_desc); @@ -3475,6 +3486,11 @@ static int ath11k_dp_rx_frag_h_mpdu(struct ath11k *ar, seqno = ath11k_dp_rx_h_mpdu_start_seq_no(ar->ab, rx_desc); frag_no = ath11k_dp_rx_h_mpdu_start_frag_no(ar->ab, msdu); more_frags = ath11k_dp_rx_h_mpdu_start_more_frags(ar->ab, msdu); + is_mcbc = ath11k_dp_rx_h_attn_is_mcbc(ar->ab, rx_desc); + + /* Multicast/Broadcast fragments are not expected */ + if (is_mcbc) + return -EINVAL; if (!ath11k_dp_rx_h_mpdu_start_seq_ctrl_valid(ar->ab, rx_desc) || !ath11k_dp_rx_h_mpdu_start_fc_valid(ar->ab, rx_desc) || -- GitLab From 875d598db60ac81e768fdfd2c589f6209038488b Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 11 May 2021 18:34:13 +0200 Subject: [PATCH 0389/3804] MAINTAINERS: Update address for Emma Anholt Reviewed-by: Emma Anholt Signed-off-by: Daniel Vetter --- MAINTAINERS | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index bd7aff0c120f2..38a1e3bf5af08 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5639,7 +5639,7 @@ T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/sun4i/sun8i* DRM DRIVER FOR ARM PL111 CLCD -M: Eric Anholt +M: Emma Anholt S: Supported T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/pl111/ @@ -5719,7 +5719,7 @@ T: git git://anongit.freedesktop.org/drm/drm-misc F: drivers/gpu/drm/tiny/gm12u320.c DRM DRIVER FOR HX8357D PANELS -M: Eric Anholt +M: Emma Anholt S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc F: Documentation/devicetree/bindings/display/himax,hx8357d.txt @@ -6177,7 +6177,7 @@ F: Documentation/devicetree/bindings/display/ti/ F: drivers/gpu/drm/omapdrm/ DRM DRIVERS FOR V3D -M: Eric Anholt +M: Emma Anholt S: Supported T: git git://anongit.freedesktop.org/drm/drm-misc F: Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml @@ -6185,7 +6185,7 @@ F: drivers/gpu/drm/v3d/ F: include/uapi/drm/v3d_drm.h DRM DRIVERS FOR VC4 -M: Eric Anholt +M: Emma Anholt M: Maxime Ripard S: Supported T: git git://github.com/anholt/linux -- GitLab From e09784a8a751e539dffc94d43bc917b0ac1e934a Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 11 May 2021 03:45:16 +0200 Subject: [PATCH 0390/3804] alarmtimer: Check RTC features instead of ops RTC drivers used to leave .set_alarm() NULL in order to signal the RTC device doesn't support alarms. The drivers are now clearing the RTC_FEATURE_ALARM bit for that purpose in order to keep the rtc_class_ops structure const. So now, .set_alarm() is set unconditionally and this possibly causes the alarmtimer code to select an RTC device that doesn't support alarms. Test RTC_FEATURE_ALARM instead of relying on ops->set_alarm to determine whether alarms are available. Fixes: 7ae41220ef58 ("rtc: introduce features bitfield") Signed-off-by: Alexandre Belloni Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20210511014516.563031-1-alexandre.belloni@bootlin.com --- kernel/time/alarmtimer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index bea9d08b16988..5897828b9d7ed 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -92,7 +92,7 @@ static int alarmtimer_rtc_add_device(struct device *dev, if (rtcdev) return -EBUSY; - if (!rtc->ops->set_alarm) + if (!test_bit(RTC_FEATURE_ALARM, rtc->features)) return -1; if (!device_may_wakeup(rtc->dev.parent)) return -1; -- GitLab From 0bd50826a40e012a35c58ed3576b3873643e7a7d Mon Sep 17 00:00:00 2001 From: Wan Jiabing Date: Thu, 6 May 2021 15:08:24 +0800 Subject: [PATCH 0391/3804] leds: Fix reference file name of documentation In commit 56b01acc1c79a ("dt-bindings: gpio: fairchild,74hc595: Convert to json-schema"), gpio-74x164.txt was deleted and replaced by fairchild,74hc595.yaml. Fix the reference file name. Signed-off-by: Wan Jiabing Acked-by: Pavel Machek Link: https://lore.kernel.org/r/20210506070824.10965-1-wanjiabing@vivo.com Signed-off-by: Rob Herring --- Documentation/devicetree/bindings/leds/leds-bcm6328.txt | 4 ++-- Documentation/devicetree/bindings/leds/leds-bcm6358.txt | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6328.txt b/Documentation/devicetree/bindings/leds/leds-bcm6328.txt index ccebce597f372..a555d94084b7f 100644 --- a/Documentation/devicetree/bindings/leds/leds-bcm6328.txt +++ b/Documentation/devicetree/bindings/leds/leds-bcm6328.txt @@ -4,8 +4,8 @@ This controller is present on BCM6318, BCM6328, BCM6362 and BCM63268. In these SoCs it's possible to control LEDs both as GPIOs or by hardware. However, on some devices there are Serial LEDs (LEDs connected to a 74x164 controller), which can either be controlled by software (exporting the 74x164 -as spi-gpio. See Documentation/devicetree/bindings/gpio/gpio-74x164.txt), or -by hardware using this driver. +as spi-gpio. See Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml), +or by hardware using this driver. Some of these Serial LEDs are hardware controlled (e.g. ethernet LEDs) and exporting the 74x164 as spi-gpio prevents those LEDs to be hardware controlled, so the only chance to keep them working is by using this driver. diff --git a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt index da5708e7b43b9..6e51c6b91ee54 100644 --- a/Documentation/devicetree/bindings/leds/leds-bcm6358.txt +++ b/Documentation/devicetree/bindings/leds/leds-bcm6358.txt @@ -3,7 +3,7 @@ LEDs connected to Broadcom BCM6358 controller This controller is present on BCM6358 and BCM6368. In these SoCs there are Serial LEDs (LEDs connected to a 74x164 controller), which can either be controlled by software (exporting the 74x164 as spi-gpio. -See Documentation/devicetree/bindings/gpio/gpio-74x164.txt), or +See Documentation/devicetree/bindings/gpio/fairchild,74hc595.yaml), or by hardware using this driver. Required properties: -- GitLab From 67f29896fdc83298eed5a6576ff8f9873f709228 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Tue, 11 May 2021 10:26:03 +0300 Subject: [PATCH 0392/3804] RDMA/rxe: Clear all QP fields if creation failed rxe_qp_do_cleanup() relies on valid pointer values in QP for the properly created ones, but in case rxe_qp_from_init() failed it was filled with garbage and caused tot the following error. refcount_t: underflow; use-after-free. WARNING: CPU: 1 PID: 12560 at lib/refcount.c:28 refcount_warn_saturate+0x1d1/0x1e0 lib/refcount.c:28 Modules linked in: CPU: 1 PID: 12560 Comm: syz-executor.4 Not tainted 5.12.0-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:refcount_warn_saturate+0x1d1/0x1e0 lib/refcount.c:28 Code: e9 db fe ff ff 48 89 df e8 2c c2 ea fd e9 8a fe ff ff e8 72 6a a7 fd 48 c7 c7 e0 b2 c1 89 c6 05 dc 3a e6 09 01 e8 ee 74 fb 04 <0f> 0b e9 af fe ff ff 0f 1f 84 00 00 00 00 00 41 56 41 55 41 54 55 RSP: 0018:ffffc900097ceba8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000040000 RSI: ffffffff815bb075 RDI: fffff520012f9d67 RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 R10: ffffffff815b4eae R11: 0000000000000000 R12: ffff8880322a4800 R13: ffff8880322a4940 R14: ffff888033044e00 R15: 0000000000000000 FS: 00007f6eb2be3700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fdbe5d41000 CR3: 000000001d181000 CR4: 00000000001506e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: __refcount_sub_and_test include/linux/refcount.h:283 [inline] __refcount_dec_and_test include/linux/refcount.h:315 [inline] refcount_dec_and_test include/linux/refcount.h:333 [inline] kref_put include/linux/kref.h:64 [inline] rxe_qp_do_cleanup+0x96f/0xaf0 drivers/infiniband/sw/rxe/rxe_qp.c:805 execute_in_process_context+0x37/0x150 kernel/workqueue.c:3327 rxe_elem_release+0x9f/0x180 drivers/infiniband/sw/rxe/rxe_pool.c:391 kref_put include/linux/kref.h:65 [inline] rxe_create_qp+0x2cd/0x310 drivers/infiniband/sw/rxe/rxe_verbs.c:425 _ib_create_qp drivers/infiniband/core/core_priv.h:331 [inline] ib_create_named_qp+0x2ad/0x1370 drivers/infiniband/core/verbs.c:1231 ib_create_qp include/rdma/ib_verbs.h:3644 [inline] create_mad_qp+0x177/0x2d0 drivers/infiniband/core/mad.c:2920 ib_mad_port_open drivers/infiniband/core/mad.c:3001 [inline] ib_mad_init_device+0xd6f/0x1400 drivers/infiniband/core/mad.c:3092 add_client_context+0x405/0x5e0 drivers/infiniband/core/device.c:717 enable_device_and_get+0x1cd/0x3b0 drivers/infiniband/core/device.c:1331 ib_register_device drivers/infiniband/core/device.c:1413 [inline] ib_register_device+0x7c7/0xa50 drivers/infiniband/core/device.c:1365 rxe_register_device+0x3d5/0x4a0 drivers/infiniband/sw/rxe/rxe_verbs.c:1147 rxe_add+0x12fe/0x16d0 drivers/infiniband/sw/rxe/rxe.c:247 rxe_net_add+0x8c/0xe0 drivers/infiniband/sw/rxe/rxe_net.c:503 rxe_newlink drivers/infiniband/sw/rxe/rxe.c:269 [inline] rxe_newlink+0xb7/0xe0 drivers/infiniband/sw/rxe/rxe.c:250 nldev_newlink+0x30e/0x550 drivers/infiniband/core/nldev.c:1555 rdma_nl_rcv_msg+0x36d/0x690 drivers/infiniband/core/netlink.c:195 rdma_nl_rcv_skb drivers/infiniband/core/netlink.c:239 [inline] rdma_nl_rcv+0x2ee/0x430 drivers/infiniband/core/netlink.c:259 netlink_unicast_kernel net/netlink/af_netlink.c:1312 [inline] netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1338 netlink_sendmsg+0x856/0xd90 net/netlink/af_netlink.c:1927 sock_sendmsg_nosec net/socket.c:654 [inline] sock_sendmsg+0xcf/0x120 net/socket.c:674 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2350 ___sys_sendmsg+0xf3/0x170 net/socket.c:2404 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2433 do_syscall_64+0x3a/0xb0 arch/x86/entry/common.c:47 entry_SYSCALL_64_after_hwframe+0x44/0xae Fixes: 8700e3e7c485 ("Soft RoCE driver") Link: https://lore.kernel.org/r/7bf8d548764d406dbbbaf4b574960ebfd5af8387.1620717918.git.leonro@nvidia.com Reported-by: syzbot+36a7f280de4e11c6f04e@syzkaller.appspotmail.com Signed-off-by: Leon Romanovsky Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_qp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 34ae957a315ca..b0f350d674fdb 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -242,6 +242,7 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, if (err) { vfree(qp->sq.queue->buf); kfree(qp->sq.queue); + qp->sq.queue = NULL; return err; } @@ -295,6 +296,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, if (err) { vfree(qp->rq.queue->buf); kfree(qp->rq.queue); + qp->rq.queue = NULL; return err; } } @@ -355,6 +357,11 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, err2: rxe_queue_cleanup(qp->sq.queue); err1: + qp->pd = NULL; + qp->rcq = NULL; + qp->scq = NULL; + qp->srq = NULL; + if (srq) rxe_drop_ref(srq); rxe_drop_ref(scq); -- GitLab From b24abcff918a5cbf44b0c982bd3477a93e8e4911 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 May 2021 22:35:16 +0200 Subject: [PATCH 0393/3804] bpf, kconfig: Add consolidated menu entry for bpf with core options Right now, all core BPF related options are scattered in different Kconfig locations mainly due to historic reasons. Moving forward, lets add a proper subsystem entry under ... General setup ---> BPF subsystem ---> ... in order to have all knobs in a single location and thus ease BPF related configuration. Networking related bits such as sockmap are out of scope for the general setup and therefore better suited to remain in net/Kconfig. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/f23f58765a4d59244ebd8037da7b6a6b2fb58446.1620765074.git.daniel@iogearbox.net --- init/Kconfig | 41 +----------------------- kernel/bpf/Kconfig | 78 ++++++++++++++++++++++++++++++++++++++++++++++ net/Kconfig | 27 ---------------- 3 files changed, 79 insertions(+), 67 deletions(-) create mode 100644 kernel/bpf/Kconfig diff --git a/init/Kconfig b/init/Kconfig index ca559ccdaa324..2282a6842dc68 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -439,6 +439,7 @@ config AUDITSYSCALL source "kernel/irq/Kconfig" source "kernel/time/Kconfig" +source "kernel/bpf/Kconfig" source "kernel/Kconfig.preempt" menu "CPU/Task time and stats accounting" @@ -1705,46 +1706,6 @@ config KALLSYMS_BASE_RELATIVE # syscall, maps, verifier -config BPF_LSM - bool "LSM Instrumentation with BPF" - depends on BPF_EVENTS - depends on BPF_SYSCALL - depends on SECURITY - depends on BPF_JIT - help - Enables instrumentation of the security hooks with eBPF programs for - implementing dynamic MAC and Audit Policies. - - If you are unsure how to answer this question, answer N. - -config BPF_SYSCALL - bool "Enable bpf() system call" - select BPF - select IRQ_WORK - select TASKS_TRACE_RCU - select BINARY_PRINTF - select NET_SOCK_MSG if INET - default n - help - Enable the bpf() system call that allows to manipulate eBPF - programs and maps via file descriptors. - -config ARCH_WANT_DEFAULT_BPF_JIT - bool - -config BPF_JIT_ALWAYS_ON - bool "Permanently enable BPF JIT and remove BPF interpreter" - depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT - help - Enables BPF JIT and removes BPF interpreter to avoid - speculative execution of BPF instructions by the interpreter - -config BPF_JIT_DEFAULT_ON - def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON - depends on HAVE_EBPF_JIT && BPF_JIT - -source "kernel/bpf/preload/Kconfig" - config USERFAULTFD bool "Enable userfaultfd() system call" depends on MMU diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig new file mode 100644 index 0000000000000..b4edaefc62555 --- /dev/null +++ b/kernel/bpf/Kconfig @@ -0,0 +1,78 @@ +# SPDX-License-Identifier: GPL-2.0-only + +# BPF interpreter that, for example, classic socket filters depend on. +config BPF + bool + +# Used by archs to tell that they support BPF JIT compiler plus which +# flavour. Only one of the two can be selected for a specific arch since +# eBPF JIT supersedes the cBPF JIT. + +# Classic BPF JIT (cBPF) +config HAVE_CBPF_JIT + bool + +# Extended BPF JIT (eBPF) +config HAVE_EBPF_JIT + bool + +# Used by archs to tell that they want the BPF JIT compiler enabled by +# default for kernels that were compiled with BPF JIT support. +config ARCH_WANT_DEFAULT_BPF_JIT + bool + +menu "BPF subsystem" + +config BPF_SYSCALL + bool "Enable bpf() system call" + select BPF + select IRQ_WORK + select TASKS_TRACE_RCU + select BINARY_PRINTF + select NET_SOCK_MSG if INET + default n + help + Enable the bpf() system call that allows to manipulate BPF programs + and maps via file descriptors. + +config BPF_JIT + bool "Enable BPF Just In Time compiler" + depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT + depends on MODULES + help + BPF programs are normally handled by a BPF interpreter. This option + allows the kernel to generate native code when a program is loaded + into the kernel. This will significantly speed-up processing of BPF + programs. + + Note, an admin should enable this feature changing: + /proc/sys/net/core/bpf_jit_enable + /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_kallsyms (optional) + +config BPF_JIT_ALWAYS_ON + bool "Permanently enable BPF JIT and remove BPF interpreter" + depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT + help + Enables BPF JIT and removes BPF interpreter to avoid speculative + execution of BPF instructions by the interpreter. + +config BPF_JIT_DEFAULT_ON + def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON + depends on HAVE_EBPF_JIT && BPF_JIT + +source "kernel/bpf/preload/Kconfig" + +config BPF_LSM + bool "Enable BPF LSM Instrumentation" + depends on BPF_EVENTS + depends on BPF_SYSCALL + depends on SECURITY + depends on BPF_JIT + help + Enables instrumentation of the security hooks with BPF programs for + implementing dynamic MAC and Audit Policies. + + If you are unsure how to answer this question, answer N. + +endmenu # "BPF subsystem" diff --git a/net/Kconfig b/net/Kconfig index f5ee7c65e6b4b..c7392c449b254 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -302,21 +302,6 @@ config BQL select DQL default y -config BPF_JIT - bool "enable BPF Just In Time compiler" - depends on HAVE_CBPF_JIT || HAVE_EBPF_JIT - depends on MODULES - help - Berkeley Packet Filter filtering capabilities are normally handled - by an interpreter. This option allows kernel to generate a native - code when filter is loaded in memory. This should speedup - packet sniffing (libpcap/tcpdump). - - Note, admin should enable this feature changing: - /proc/sys/net/core/bpf_jit_enable - /proc/sys/net/core/bpf_jit_harden (optional) - /proc/sys/net/core/bpf_jit_kallsyms (optional) - config BPF_STREAM_PARSER bool "enable BPF STREAM_PARSER" depends on INET @@ -470,15 +455,3 @@ config ETHTOOL_NETLINK e.g. notification messages. endif # if NET - -# Used by archs to tell that they support BPF JIT compiler plus which flavour. -# Only one of the two can be selected for a specific arch since eBPF JIT supersedes -# the cBPF JIT. - -# Classic BPF JIT (cBPF) -config HAVE_CBPF_JIT - bool - -# Extended BPF JIT (eBPF) -config HAVE_EBPF_JIT - bool -- GitLab From 08389d888287c3823f80b0216766b71e17f0aba5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 11 May 2021 22:35:17 +0200 Subject: [PATCH 0394/3804] bpf: Add kconfig knob for disabling unpriv bpf by default Add a kconfig knob which allows for unprivileged bpf to be disabled by default. If set, the knob sets /proc/sys/kernel/unprivileged_bpf_disabled to value of 2. This still allows a transition of 2 -> {0,1} through an admin. Similarly, this also still keeps 1 -> {1} behavior intact, so that once set to permanently disabled, it cannot be undone aside from a reboot. We've also added extra2 with max of 2 for the procfs handler, so that an admin still has a chance to toggle between 0 <-> 2. Either way, as an additional alternative, applications can make use of CAP_BPF that we added a while ago. Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.1620765074.git.daniel@iogearbox.net --- Documentation/admin-guide/sysctl/kernel.rst | 17 +++++++++--- kernel/bpf/Kconfig | 10 +++++++ kernel/bpf/syscall.c | 3 ++- kernel/sysctl.c | 29 +++++++++++++++++---- 4 files changed, 50 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 1d56a6b73a4e9..24ab20d7a50ad 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1457,11 +1457,22 @@ unprivileged_bpf_disabled ========================= Writing 1 to this entry will disable unprivileged calls to ``bpf()``; -once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return -``-EPERM``. +once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` or ``CAP_BPF`` +will return ``-EPERM``. Once set to 1, this can't be cleared from the +running kernel anymore. -Once set, this can't be cleared. +Writing 2 to this entry will also disable unprivileged calls to ``bpf()``, +however, an admin can still change this setting later on, if needed, by +writing 0 or 1 to this entry. +If ``BPF_UNPRIV_DEFAULT_OFF`` is enabled in the kernel config, then this +entry will default to 2 instead of 0. + += ============================================================= +0 Unprivileged calls to ``bpf()`` are enabled +1 Unprivileged calls to ``bpf()`` are disabled without recovery +2 Unprivileged calls to ``bpf()`` are disabled += ============================================================= watchdog ======== diff --git a/kernel/bpf/Kconfig b/kernel/bpf/Kconfig index b4edaefc62555..26b591e23f16a 100644 --- a/kernel/bpf/Kconfig +++ b/kernel/bpf/Kconfig @@ -61,6 +61,16 @@ config BPF_JIT_DEFAULT_ON def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON depends on HAVE_EBPF_JIT && BPF_JIT +config BPF_UNPRIV_DEFAULT_OFF + bool "Disable unprivileged BPF by default" + depends on BPF_SYSCALL + help + Disables unprivileged BPF by default by setting the corresponding + /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can + still reenable it by setting it to 0 later on, or permanently + disable it by setting it to 1 (from which no other transition to + 0 is possible anymore). + source "kernel/bpf/preload/Kconfig" config BPF_LSM diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 941ca06d9dfa1..ea04b0deb5ce4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -50,7 +50,8 @@ static DEFINE_SPINLOCK(map_idr_lock); static DEFINE_IDR(link_idr); static DEFINE_SPINLOCK(link_idr_lock); -int sysctl_unprivileged_bpf_disabled __read_mostly; +int sysctl_unprivileged_bpf_disabled __read_mostly = + IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; static const struct bpf_map_ops * const bpf_map_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index f91d327273c1b..6df7c81f7cdd1 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -225,7 +225,27 @@ static int bpf_stats_handler(struct ctl_table *table, int write, mutex_unlock(&bpf_stats_enabled_mutex); return ret; } -#endif + +static int bpf_unpriv_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, unpriv_enable = *(int *)table->data; + bool locked_state = unpriv_enable == 1; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &unpriv_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + if (locked_state && unpriv_enable != 1) + return -EPERM; + *(int *)table->data = unpriv_enable; + } + return ret; +} +#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ /* * /proc/sys support @@ -2600,10 +2620,9 @@ static struct ctl_table kern_table[] = { .data = &sysctl_unprivileged_bpf_disabled, .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), .mode = 0644, - /* only handle a transition from default "0" to "1" */ - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - .extra2 = SYSCTL_ONE, + .proc_handler = bpf_unpriv_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = &two, }, { .procname = "bpf_stats_enabled", -- GitLab From 35e3815fa8102fab4dee75f3547472c66581125d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 29 Apr 2021 13:47:12 +0200 Subject: [PATCH 0395/3804] bpf: Add deny list of btf ids check for tracing programs The recursion check in __bpf_prog_enter and __bpf_prog_exit leaves some (not inlined) functions unprotected: In __bpf_prog_enter: - migrate_disable is called before prog->active is checked In __bpf_prog_exit: - migrate_enable,rcu_read_unlock_strict are called after prog->active is decreased When attaching trampoline to them we get panic like: traps: PANIC: double fault, error_code: 0x0 double fault: 0000 [#1] SMP PTI RIP: 0010:__bpf_prog_enter+0x4/0x50 ... Call Trace: bpf_trampoline_6442466513_0+0x18/0x1000 migrate_disable+0x5/0x50 __bpf_prog_enter+0x9/0x50 bpf_trampoline_6442466513_0+0x18/0x1000 migrate_disable+0x5/0x50 __bpf_prog_enter+0x9/0x50 bpf_trampoline_6442466513_0+0x18/0x1000 migrate_disable+0x5/0x50 __bpf_prog_enter+0x9/0x50 bpf_trampoline_6442466513_0+0x18/0x1000 migrate_disable+0x5/0x50 ... Fixing this by adding deny list of btf ids for tracing programs and checking btf id during program verification. Adding above functions to this list. Suggested-by: Alexei Starovoitov Signed-off-by: Jiri Olsa Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210429114712.43783-1-jolsa@kernel.org --- kernel/bpf/verifier.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9352a1b7de2dd..c58598ef4b5b5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -13196,6 +13196,17 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, return 0; } +BTF_SET_START(btf_id_deny) +BTF_ID_UNUSED +#ifdef CONFIG_SMP +BTF_ID(func, migrate_disable) +BTF_ID(func, migrate_enable) +#endif +#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU +BTF_ID(func, rcu_read_unlock_strict) +#endif +BTF_SET_END(btf_id_deny) + static int check_attach_btf_id(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; @@ -13255,6 +13266,9 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) ret = bpf_lsm_verify_prog(&env->log, prog); if (ret < 0) return ret; + } else if (prog->type == BPF_PROG_TYPE_TRACING && + btf_id_set_contains(&btf_id_deny, btf_id)) { + return -EINVAL; } key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id); -- GitLab From e2d5b2bb769fa5f500760caba76436ba3a10a895 Mon Sep 17 00:00:00 2001 From: Florent Revest Date: Tue, 11 May 2021 10:10:54 +0200 Subject: [PATCH 0396/3804] bpf: Fix nested bpf_bprintf_prepare with more per-cpu buffers The bpf_seq_printf, bpf_trace_printk and bpf_snprintf helpers share one per-cpu buffer that they use to store temporary data (arguments to bprintf). They "get" that buffer with try_get_fmt_tmp_buf and "put" it by the end of their scope with bpf_bprintf_cleanup. If one of these helpers gets called within the scope of one of these helpers, for example: a first bpf program gets called, uses bpf_trace_printk which calls raw_spin_lock_irqsave which is traced by another bpf program that calls bpf_snprintf, then the second "get" fails. Essentially, these helpers are not re-entrant. They would return -EBUSY and print a warning message once. This patch triples the number of bprintf buffers to allow three levels of nesting. This is very similar to what was done for tracepoints in "9594dc3c7e7 bpf: fix nested bpf tracepoints with per-cpu data" Fixes: d9c9e4db186a ("bpf: Factorize bpf_trace_printk and bpf_seq_printf") Reported-by: syzbot+63122d0bc347f18c1884@syzkaller.appspotmail.com Signed-off-by: Florent Revest Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20210511081054.2125874-1-revest@chromium.org --- kernel/bpf/helpers.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 544773970dbc6..ef658a9ea5c93 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -696,34 +696,35 @@ static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype, */ #define MAX_PRINTF_BUF_LEN 512 -struct bpf_printf_buf { - char tmp_buf[MAX_PRINTF_BUF_LEN]; +/* Support executing three nested bprintf helper calls on a given CPU */ +struct bpf_bprintf_buffers { + char tmp_bufs[3][MAX_PRINTF_BUF_LEN]; }; -static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf); -static DEFINE_PER_CPU(int, bpf_printf_buf_used); +static DEFINE_PER_CPU(struct bpf_bprintf_buffers, bpf_bprintf_bufs); +static DEFINE_PER_CPU(int, bpf_bprintf_nest_level); static int try_get_fmt_tmp_buf(char **tmp_buf) { - struct bpf_printf_buf *bufs; - int used; + struct bpf_bprintf_buffers *bufs; + int nest_level; preempt_disable(); - used = this_cpu_inc_return(bpf_printf_buf_used); - if (WARN_ON_ONCE(used > 1)) { - this_cpu_dec(bpf_printf_buf_used); + nest_level = this_cpu_inc_return(bpf_bprintf_nest_level); + if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bufs->tmp_bufs))) { + this_cpu_dec(bpf_bprintf_nest_level); preempt_enable(); return -EBUSY; } - bufs = this_cpu_ptr(&bpf_printf_buf); - *tmp_buf = bufs->tmp_buf; + bufs = this_cpu_ptr(&bpf_bprintf_bufs); + *tmp_buf = bufs->tmp_bufs[nest_level - 1]; return 0; } void bpf_bprintf_cleanup(void) { - if (this_cpu_read(bpf_printf_buf_used)) { - this_cpu_dec(bpf_printf_buf_used); + if (this_cpu_read(bpf_bprintf_nest_level)) { + this_cpu_dec(bpf_bprintf_nest_level); preempt_enable(); } } -- GitLab From 67e7ec0bd4535fc6e6d3f5d174f80e10a8a80c6e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sat, 8 May 2021 12:22:12 -0300 Subject: [PATCH 0397/3804] libbpf: Provide GELF_ST_VISIBILITY() define for older libelf Where that macro isn't available. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/YJaspEh0qZr4LYOc@kernel.org --- tools/lib/bpf/libbpf_internal.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index ee426226928f1..acbcf6c7bdf82 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -41,6 +41,11 @@ #define ELF_C_READ_MMAP ELF_C_READ #endif +/* Older libelf all end up in this expression, for both 32 and 64 bit */ +#ifndef GELF_ST_VISIBILITY +#define GELF_ST_VISIBILITY(o) ((o) & 0x03) +#endif + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) #define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type) -- GitLab From 096eccdef0b32f47e9354231ddc3aaaf9527d51c Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Wed, 5 May 2021 08:59:25 +0000 Subject: [PATCH 0398/3804] selftests/bpf: Rewrite test_tc_redirect.sh as prog_tests/tc_redirect.c As discussed in [0], this ports test_tc_redirect.sh to the test_progs framework and removes the old test. This makes it more in line with rest of the tests and makes it possible to run this test case with vmtest.sh and under the bpf CI. The upcoming skb_change_head() helper fix in [0] is depending on it and extending the test case to redirect a packet from L3 device to veth. [0] https://lore.kernel.org/bpf/20210427135550.807355-1-joamaki@gmail.com Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210505085925.783985-1-joamaki@gmail.com --- tools/testing/selftests/bpf/network_helpers.c | 2 +- tools/testing/selftests/bpf/network_helpers.h | 1 + .../selftests/bpf/prog_tests/tc_redirect.c | 589 ++++++++++++++++++ .../selftests/bpf/progs/test_tc_neigh.c | 33 +- .../selftests/bpf/progs/test_tc_neigh_fib.c | 9 +- .../selftests/bpf/progs/test_tc_peer.c | 33 +- .../testing/selftests/bpf/test_tc_redirect.sh | 216 ------- 7 files changed, 617 insertions(+), 266 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/tc_redirect.c delete mode 100755 tools/testing/selftests/bpf/test_tc_redirect.sh diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 12ee40284da02..2060bc122c530 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -40,7 +40,7 @@ struct ipv6_packet pkt_v6 = { .tcp.doff = 5, }; -static int settimeo(int fd, int timeout_ms) +int settimeo(int fd, int timeout_ms) { struct timeval timeout = { .tv_sec = 3 }; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 7205f8afdba11..5e0d51c07b632 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -33,6 +33,7 @@ struct ipv6_packet { } __packed; extern struct ipv6_packet pkt_v6; +int settimeo(int fd, int timeout_ms); int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); int connect_to_fd(int server_fd, int timeout_ms); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c new file mode 100644 index 0000000000000..95ef9fcd31d8b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -0,0 +1,589 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link + * between src and dst. The netns fwd has veth links to each src and dst. The + * client is in src and server in dst. The test installs a TC BPF program to each + * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the + * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace + * switch from ingress side; it also installs a checker prog on the egress side + * to drop unexpected traffic. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_progs.h" +#include "network_helpers.h" +#include "test_tc_neigh_fib.skel.h" +#include "test_tc_neigh.skel.h" +#include "test_tc_peer.skel.h" + +#define NS_SRC "ns_src" +#define NS_FWD "ns_fwd" +#define NS_DST "ns_dst" + +#define IP4_SRC "172.16.1.100" +#define IP4_DST "172.16.2.100" +#define IP4_PORT 9004 + +#define IP6_SRC "::1:dead:beef:cafe" +#define IP6_DST "::2:dead:beef:cafe" +#define IP6_PORT 9006 + +#define IP4_SLL "169.254.0.1" +#define IP4_DLL "169.254.0.2" +#define IP4_NET "169.254.0.0" + +#define IFADDR_STR_LEN 18 +#define PING_ARGS "-c 3 -w 10 -q" + +#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src" +#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst" +#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk" + +#define TIMEOUT_MILLIS 10000 + +#define MAX_PROC_MODS 128 +#define MAX_PROC_VALUE_LEN 16 + +#define log_err(MSG, ...) \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) + +struct proc_mod { + char path[PATH_MAX]; + char oldval[MAX_PROC_VALUE_LEN]; + int oldlen; +}; + +static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL}; +static int root_netns_fd = -1; +static int num_proc_mods; +static struct proc_mod proc_mods[MAX_PROC_MODS]; + +/** + * modify_proc() - Modify entry in /proc + * + * Modifies an entry in /proc and saves the original value for later + * restoration with restore_proc(). + */ +static int modify_proc(const char *path, const char *newval) +{ + struct proc_mod *mod; + FILE *f; + + if (num_proc_mods + 1 > MAX_PROC_MODS) + return -1; + + f = fopen(path, "r+"); + if (!f) + return -1; + + mod = &proc_mods[num_proc_mods]; + num_proc_mods++; + + strncpy(mod->path, path, PATH_MAX); + + if (!fread(mod->oldval, 1, MAX_PROC_VALUE_LEN, f)) { + log_err("reading from %s failed", path); + goto fail; + } + rewind(f); + if (fwrite(newval, strlen(newval), 1, f) != 1) { + log_err("writing to %s failed", path); + goto fail; + } + + fclose(f); + return 0; + +fail: + fclose(f); + num_proc_mods--; + return -1; +} + +/** + * restore_proc() - Restore all /proc modifications + */ +static void restore_proc(void) +{ + int i; + + for (i = 0; i < num_proc_mods; i++) { + struct proc_mod *mod = &proc_mods[i]; + FILE *f; + + f = fopen(mod->path, "w"); + if (!f) { + log_err("fopen of %s failed", mod->path); + continue; + } + + if (fwrite(mod->oldval, mod->oldlen, 1, f) != 1) + log_err("fwrite to %s failed", mod->path); + + fclose(f); + } + num_proc_mods = 0; +} + +/** + * setns_by_name() - Set networks namespace by name + */ +static int setns_by_name(const char *name) +{ + int nsfd; + char nspath[PATH_MAX]; + int err; + + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); + nsfd = open(nspath, O_RDONLY | O_CLOEXEC); + if (nsfd < 0) + return nsfd; + + err = setns(nsfd, CLONE_NEWNET); + close(nsfd); + + return err; +} + +/** + * setns_root() - Set network namespace to original (root) namespace + * + * Not expected to ever fail, so error not returned, but failure logged + * and test marked as failed. + */ +static void setns_root(void) +{ + ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "setns root"); +} + +static int netns_setup_namespaces(const char *verb) +{ + const char * const *ns = namespaces; + char cmd[128]; + + while (*ns) { + snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns); + if (!ASSERT_OK(system(cmd), cmd)) + return -1; + ns++; + } + return 0; +} + +struct netns_setup_result { + int ifindex_veth_src_fwd; + int ifindex_veth_dst_fwd; +}; + +static int get_ifaddr(const char *name, char *ifaddr) +{ + char path[PATH_MAX]; + FILE *f; + int ret; + + snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name); + f = fopen(path, "r"); + if (!ASSERT_OK_PTR(f, path)) + return -1; + + ret = fread(ifaddr, 1, IFADDR_STR_LEN, f); + if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) { + fclose(f); + return -1; + } + fclose(f); + return 0; +} + +static int get_ifindex(const char *name) +{ + char path[PATH_MAX]; + char buf[32]; + FILE *f; + int ret; + + snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name); + f = fopen(path, "r"); + if (!ASSERT_OK_PTR(f, path)) + return -1; + + ret = fread(buf, 1, sizeof(buf), f); + if (!ASSERT_GT(ret, 0, "fread ifindex")) { + fclose(f); + return -1; + } + fclose(f); + return atoi(buf); +} + +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + goto fail; \ + }) + +static int netns_setup_links_and_routes(struct netns_setup_result *result) +{ + char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {}; + char veth_dst_fwd_addr[IFADDR_STR_LEN+1] = {}; + + SYS("ip link add veth_src type veth peer name veth_src_fwd"); + SYS("ip link add veth_dst type veth peer name veth_dst_fwd"); + if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr)) + goto fail; + if (get_ifaddr("veth_dst_fwd", veth_dst_fwd_addr)) + goto fail; + + result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd"); + if (result->ifindex_veth_src_fwd < 0) + goto fail; + result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd"); + if (result->ifindex_veth_dst_fwd < 0) + goto fail; + + SYS("ip link set veth_src netns " NS_SRC); + SYS("ip link set veth_src_fwd netns " NS_FWD); + SYS("ip link set veth_dst_fwd netns " NS_FWD); + SYS("ip link set veth_dst netns " NS_DST); + + /** setup in 'src' namespace */ + if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src")) + goto fail; + + SYS("ip addr add " IP4_SRC "/32 dev veth_src"); + SYS("ip addr add " IP6_SRC "/128 dev veth_src nodad"); + SYS("ip link set dev veth_src up"); + + SYS("ip route add " IP4_DST "/32 dev veth_src scope global"); + SYS("ip route add " IP4_NET "/16 dev veth_src scope global"); + SYS("ip route add " IP6_DST "/128 dev veth_src scope global"); + + SYS("ip neigh add " IP4_DST " dev veth_src lladdr %s", + veth_src_fwd_addr); + SYS("ip neigh add " IP6_DST " dev veth_src lladdr %s", + veth_src_fwd_addr); + + /** setup in 'fwd' namespace */ + if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + goto fail; + + /* The fwd netns automatically gets a v6 LL address / routes, but also + * needs v4 one in order to start ARP probing. IP4_NET route is added + * to the endpoints so that the ARP processing will reply. + */ + SYS("ip addr add " IP4_SLL "/32 dev veth_src_fwd"); + SYS("ip addr add " IP4_DLL "/32 dev veth_dst_fwd"); + SYS("ip link set dev veth_src_fwd up"); + SYS("ip link set dev veth_dst_fwd up"); + + SYS("ip route add " IP4_SRC "/32 dev veth_src_fwd scope global"); + SYS("ip route add " IP6_SRC "/128 dev veth_src_fwd scope global"); + SYS("ip route add " IP4_DST "/32 dev veth_dst_fwd scope global"); + SYS("ip route add " IP6_DST "/128 dev veth_dst_fwd scope global"); + + /** setup in 'dst' namespace */ + if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst")) + goto fail; + + SYS("ip addr add " IP4_DST "/32 dev veth_dst"); + SYS("ip addr add " IP6_DST "/128 dev veth_dst nodad"); + SYS("ip link set dev veth_dst up"); + + SYS("ip route add " IP4_SRC "/32 dev veth_dst scope global"); + SYS("ip route add " IP4_NET "/16 dev veth_dst scope global"); + SYS("ip route add " IP6_SRC "/128 dev veth_dst scope global"); + + SYS("ip neigh add " IP4_SRC " dev veth_dst lladdr %s", + veth_dst_fwd_addr); + SYS("ip neigh add " IP6_SRC " dev veth_dst lladdr %s", + veth_dst_fwd_addr); + + setns_root(); + return 0; +fail: + setns_root(); + return -1; +} + +static int netns_load_bpf(void) +{ + if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + return -1; + + SYS("tc qdisc add dev veth_src_fwd clsact"); + SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned " + SRC_PROG_PIN_FILE); + SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + SYS("tc qdisc add dev veth_dst_fwd clsact"); + SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned " + DST_PROG_PIN_FILE); + SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned " + CHK_PROG_PIN_FILE); + + setns_root(); + return -1; +fail: + setns_root(); + return -1; +} + +static int netns_unload_bpf(void) +{ + if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + goto fail; + SYS("tc qdisc delete dev veth_src_fwd clsact"); + SYS("tc qdisc delete dev veth_dst_fwd clsact"); + + setns_root(); + return 0; +fail: + setns_root(); + return -1; +} + + +static void test_tcp(int family, const char *addr, __u16 port) +{ + int listen_fd = -1, accept_fd = -1, client_fd = -1; + char buf[] = "testing testing"; + int n; + + if (!ASSERT_OK(setns_by_name(NS_DST), "setns dst")) + return; + + listen_fd = start_server(family, SOCK_STREAM, addr, port, 0); + if (!ASSERT_GE(listen_fd, 0, "listen")) + goto done; + + if (!ASSERT_OK(setns_by_name(NS_SRC), "setns src")) + goto done; + + client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto done; + + accept_fd = accept(listen_fd, NULL, NULL); + if (!ASSERT_GE(accept_fd, 0, "accept")) + goto done; + + if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo")) + goto done; + + n = write(client_fd, buf, sizeof(buf)); + if (!ASSERT_EQ(n, sizeof(buf), "send to server")) + goto done; + + n = read(accept_fd, buf, sizeof(buf)); + ASSERT_EQ(n, sizeof(buf), "recv from server"); + +done: + setns_root(); + if (listen_fd >= 0) + close(listen_fd); + if (accept_fd >= 0) + close(accept_fd); + if (client_fd >= 0) + close(client_fd); +} + +static int test_ping(int family, const char *addr) +{ + const char *ping = family == AF_INET6 ? "ping6" : "ping"; + + SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s", ping, addr); + return 0; +fail: + return -1; +} + +static void test_connectivity(void) +{ + test_tcp(AF_INET, IP4_DST, IP4_PORT); + test_ping(AF_INET, IP4_DST); + test_tcp(AF_INET6, IP6_DST, IP6_PORT); + test_ping(AF_INET6, IP6_DST); +} + +static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) +{ + struct test_tc_neigh_fib *skel; + int err; + + skel = test_tc_neigh_fib__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open")) + return; + + if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load")) { + test_tc_neigh_fib__destroy(skel); + return; + } + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + /* bpf_fib_lookup() checks if forwarding is enabled */ + if (!ASSERT_OK(setns_by_name(NS_FWD), "setns fwd")) + goto done; + + err = modify_proc("/proc/sys/net/ipv4/ip_forward", "1"); + if (!ASSERT_OK(err, "set ipv4.ip_forward")) + goto done; + + err = modify_proc("/proc/sys/net/ipv6/conf/all/forwarding", "1"); + if (!ASSERT_OK(err, "set ipv6.forwarding")) + goto done; + setns_root(); + + test_connectivity(); +done: + bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + test_tc_neigh_fib__destroy(skel); + netns_unload_bpf(); + setns_root(); + restore_proc(); +} + +static void test_tc_redirect_neigh(struct netns_setup_result *setup_result) +{ + struct test_tc_neigh *skel; + int err; + + skel = test_tc_neigh__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open")) + return; + + skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_neigh__load(skel); + if (!ASSERT_OK(err, "test_tc_neigh__load")) { + test_tc_neigh__destroy(skel); + return; + } + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + test_connectivity(); + +done: + bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + test_tc_neigh__destroy(skel); + netns_unload_bpf(); + setns_root(); +} + +static void test_tc_redirect_peer(struct netns_setup_result *setup_result) +{ + struct test_tc_peer *skel; + int err; + + skel = test_tc_peer__open(); + if (!ASSERT_OK_PTR(skel, "test_tc_peer__open")) + return; + + skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd; + skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd; + + err = test_tc_peer__load(skel); + if (!ASSERT_OK(err, "test_tc_peer__load")) { + test_tc_peer__destroy(skel); + return; + } + + err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE)) + goto done; + + err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE)) + goto done; + + if (netns_load_bpf()) + goto done; + + test_connectivity(); + +done: + bpf_program__unpin(skel->progs.tc_src, SRC_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_chk, CHK_PROG_PIN_FILE); + bpf_program__unpin(skel->progs.tc_dst, DST_PROG_PIN_FILE); + test_tc_peer__destroy(skel); + netns_unload_bpf(); + setns_root(); +} + +void test_tc_redirect(void) +{ + struct netns_setup_result setup_result; + + root_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net")) + return; + + if (netns_setup_namespaces("add")) + goto done; + + if (netns_setup_links_and_routes(&setup_result)) + goto done; + + if (test__start_subtest("tc_redirect_peer")) + test_tc_redirect_peer(&setup_result); + + if (test__start_subtest("tc_redirect_neigh")) + test_tc_redirect_neigh(&setup_result); + + if (test__start_subtest("tc_redirect_neigh_fib")) + test_tc_redirect_neigh_fib(&setup_result); + +done: + close(root_netns_fd); + netns_setup_namespaces("delete"); +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index b985ac4e7a814..90f64a85998fa 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -33,17 +33,8 @@ a.s6_addr32[3] == b.s6_addr32[3]) #endif -enum { - dev_src, - dev_dst, -}; - -struct bpf_map_def SEC("maps") ifindex_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; +static volatile const __u32 IFINDEX_SRC; +static volatile const __u32 IFINDEX_DST; static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, __be32 addr) @@ -79,14 +70,8 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, return v6_equal(ip6h->daddr, addr); } -static __always_inline int get_dev_ifindex(int which) -{ - int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); - - return ifindex ? *ifindex : 0; -} - -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); @@ -98,7 +83,8 @@ SEC("chk_egress") int tc_chk(struct __sk_buff *skb) return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; } -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -119,10 +105,11 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0); + return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { __u8 zero[ETH_ALEN * 2]; bool redirect = false; @@ -143,7 +130,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0); + return bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c index d82ed3457030f..f7ab69cf018e5 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c @@ -75,7 +75,8 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb, return 0; } -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); @@ -142,12 +143,14 @@ static __always_inline int tc_redir(struct __sk_buff *skb) /* these are identical, but keep them separate for compatibility with the * section names expected by test_tc_redirect.sh */ -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) { return tc_redir(skb); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { return tc_redir(skb); } diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c index fc84a7685aa2c..72c72950c3bbe 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_peer.c +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -8,38 +8,25 @@ #include -enum { - dev_src, - dev_dst, -}; +static volatile const __u32 IFINDEX_SRC; +static volatile const __u32 IFINDEX_DST; -struct bpf_map_def SEC("maps") ifindex_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(int), - .max_entries = 2, -}; - -static __always_inline int get_dev_ifindex(int which) -{ - int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); - - return ifindex ? *ifindex : 0; -} - -SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +SEC("classifier/chk_egress") +int tc_chk(struct __sk_buff *skb) { return TC_ACT_SHOT; } -SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +SEC("classifier/dst_ingress") +int tc_dst(struct __sk_buff *skb) { - return bpf_redirect_peer(get_dev_ifindex(dev_src), 0); + return bpf_redirect_peer(IFINDEX_SRC, 0); } -SEC("src_ingress") int tc_src(struct __sk_buff *skb) +SEC("classifier/src_ingress") +int tc_src(struct __sk_buff *skb) { - return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0); + return bpf_redirect_peer(IFINDEX_DST, 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh deleted file mode 100755 index 8868aa1ca9021..0000000000000 --- a/tools/testing/selftests/bpf/test_tc_redirect.sh +++ /dev/null @@ -1,216 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link -# between src and dst. The netns fwd has veth links to each src and dst. The -# client is in src and server in dst. The test installs a TC BPF program to each -# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the -# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace -# switch from ingress side; it also installs a checker prog on the egress side -# to drop unexpected traffic. - -if [[ $EUID -ne 0 ]]; then - echo "This script must be run as root" - echo "FAIL" - exit 1 -fi - -# check that needed tools are present -command -v nc >/dev/null 2>&1 || \ - { echo >&2 "nc is not available"; exit 1; } -command -v dd >/dev/null 2>&1 || \ - { echo >&2 "dd is not available"; exit 1; } -command -v timeout >/dev/null 2>&1 || \ - { echo >&2 "timeout is not available"; exit 1; } -command -v ping >/dev/null 2>&1 || \ - { echo >&2 "ping is not available"; exit 1; } -if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi -command -v perl >/dev/null 2>&1 || \ - { echo >&2 "perl is not available"; exit 1; } -command -v jq >/dev/null 2>&1 || \ - { echo >&2 "jq is not available"; exit 1; } -command -v bpftool >/dev/null 2>&1 || \ - { echo >&2 "bpftool is not available"; exit 1; } - -readonly GREEN='\033[0;92m' -readonly RED='\033[0;31m' -readonly NC='\033[0m' # No Color - -readonly PING_ARG="-c 3 -w 10 -q" - -readonly TIMEOUT=10 - -readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" -readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" -readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" - -readonly IP4_SRC="172.16.1.100" -readonly IP4_DST="172.16.2.100" - -readonly IP6_SRC="::1:dead:beef:cafe" -readonly IP6_DST="::2:dead:beef:cafe" - -readonly IP4_SLL="169.254.0.1" -readonly IP4_DLL="169.254.0.2" -readonly IP4_NET="169.254.0.0" - -netns_cleanup() -{ - ip netns del ${NS_SRC} - ip netns del ${NS_FWD} - ip netns del ${NS_DST} -} - -netns_setup() -{ - ip netns add "${NS_SRC}" - ip netns add "${NS_FWD}" - ip netns add "${NS_DST}" - - ip link add veth_src type veth peer name veth_src_fwd - ip link add veth_dst type veth peer name veth_dst_fwd - - ip link set veth_src netns ${NS_SRC} - ip link set veth_src_fwd netns ${NS_FWD} - - ip link set veth_dst netns ${NS_DST} - ip link set veth_dst_fwd netns ${NS_FWD} - - ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src - ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst - - # The fwd netns automatically get a v6 LL address / routes, but also - # needs v4 one in order to start ARP probing. IP4_NET route is added - # to the endpoints so that the ARP processing will reply. - - ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd - ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd - - ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad - ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad - - ip -netns ${NS_SRC} link set dev veth_src up - ip -netns ${NS_FWD} link set dev veth_src_fwd up - - ip -netns ${NS_DST} link set dev veth_dst up - ip -netns ${NS_FWD} link set dev veth_dst_fwd up - - ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global - ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global - ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global - - ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global - ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global - - ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global - ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global - ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global - - ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global - ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global - - fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) - fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) - - ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src - ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst - - ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src - ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst -} - -netns_test_connectivity() -{ - set +e - - ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" - ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" - - TEST="TCPv4 connectivity test" - ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="TCPv6 connectivity test" - ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="ICMPv4 connectivity test" - ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - TEST="ICMPv6 connectivity test" - ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST} - if [ $? -ne 0 ]; then - echo -e "${TEST}: ${RED}FAIL${NC}" - exit 1 - fi - echo -e "${TEST}: ${GREEN}PASS${NC}" - - set -e -} - -hex_mem_str() -{ - perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1 -} - -netns_setup_bpf() -{ - local obj=$1 - local use_forwarding=${2:-0} - - ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress - - ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress - ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress - - if [ "$use_forwarding" -eq "1" ]; then - # bpf_fib_lookup() checks if forwarding is enabled - ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1 - ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1 - ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1 - return 0 - fi - - veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) - veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) - - progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]') - for prog in $progs; do - map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]') - if [ ! -z "$map" ]; then - bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src) - bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst) - fi - done -} - -trap netns_cleanup EXIT -set -e - -netns_setup -netns_setup_bpf test_tc_neigh.o -netns_test_connectivity -netns_cleanup -netns_setup -netns_setup_bpf test_tc_neigh_fib.o 1 -netns_test_connectivity -netns_cleanup -netns_setup -netns_setup_bpf test_tc_peer.o -netns_test_connectivity -- GitLab From 569c484f9995f489f2b80dd134269fe07d2b900d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Fri, 7 May 2021 17:50:11 -0700 Subject: [PATCH 0399/3804] bpf: Limit static tcp-cc functions in the .BTF_ids list to x86 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During the discussion in [0]. It was pointed out that static functions in ppc64 is prefixed with ".". For example, the 'readelf -s vmlinux.ppc': 89326: c000000001383280 24 NOTYPE LOCAL DEFAULT 31 cubictcp_init 89327: c000000000c97c50 168 FUNC LOCAL DEFAULT 2 .cubictcp_init The one with FUNC type is ".cubictcp_init" instead of "cubictcp_init". The "." seems to be done by arch/powerpc/include/asm/ppc_asm.h. This caused that pahole cannot generate the BTF for these tcp-cc kernel functions because pahole only captures the FUNC type and "cubictcp_init" is not. It then failed the kernel compilation in ppc64. This behavior is only reported in ppc64 so far. I tried arm64, s390, and sparc64 and did not observe this "." prefix and NOTYPE behavior. Since the kfunc call is only supported in the x86_64 and x86_32 JIT, this patch limits those tcp-cc functions to x86 only to avoid unnecessary compilation issue in other ARCHs. In the future, we can examine if it is better to change all those functions from static to extern. [0] https://lore.kernel.org/bpf/4e051459-8532-7b61-c815-f3435767f8a0@kernel.org/ Fixes: e78aea8b2170 ("bpf: tcp: Put some tcp cong functions in allowlist for bpf-tcp-cc") Signed-off-by: Martin KaFai Lau Signed-off-by: Daniel Borkmann Cc: Michal Suchánek Cc: Jiri Slaby Cc: Jiri Olsa Link: https://lore.kernel.org/bpf/20210508005011.3863757-1-kafai@fb.com --- net/ipv4/bpf_tcp_ca.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index dff4f0eb96b0f..9e41eff4a6858 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -185,6 +185,7 @@ BTF_ID(func, tcp_reno_cong_avoid) BTF_ID(func, tcp_reno_undo_cwnd) BTF_ID(func, tcp_slow_start) BTF_ID(func, tcp_cong_avoid_ai) +#ifdef CONFIG_X86 #ifdef CONFIG_DYNAMIC_FTRACE #if IS_BUILTIN(CONFIG_TCP_CONG_CUBIC) BTF_ID(func, cubictcp_init) @@ -213,6 +214,7 @@ BTF_ID(func, bbr_min_tso_segs) BTF_ID(func, bbr_set_state) #endif #endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_X86 */ BTF_SET_END(bpf_tcp_ca_kfunc_ids) static bool bpf_tcp_ca_check_kfunc_call(u32 kfunc_btf_id) -- GitLab From 349c4d6c75d74b62d8e39913b40bd06117b85e4a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 9 May 2021 21:53:03 -0700 Subject: [PATCH 0400/3804] f2fs: avoid null pointer access when handling IPU error Unable to handle kernel NULL pointer dereference at virtual address 000000000000001a pc : f2fs_inplace_write_data+0x144/0x208 lr : f2fs_inplace_write_data+0x134/0x208 Call trace: f2fs_inplace_write_data+0x144/0x208 f2fs_do_write_data_page+0x270/0x770 f2fs_write_single_data_page+0x47c/0x830 __f2fs_write_data_pages+0x444/0x98c f2fs_write_data_pages.llvm.16514453770497736882+0x2c/0x38 do_writepages+0x58/0x118 __writeback_single_inode+0x44/0x300 writeback_sb_inodes+0x4b8/0x9c8 wb_writeback+0x148/0x42c wb_do_writeback+0xc8/0x390 wb_workfn+0xb0/0x2f4 process_one_work+0x1fc/0x444 worker_thread+0x268/0x4b4 kthread+0x13c/0x158 ret_from_fork+0x10/0x18 Fixes: 955772787667 ("f2fs: drop inplace IO if fs status is abnormal") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c605415840b59..51dc79fad4fe2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3574,12 +3574,12 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) return err; drop_bio: - if (fio->bio) { + if (fio->bio && *(fio->bio)) { struct bio *bio = *(fio->bio); bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - fio->bio = NULL; + *(fio->bio) = NULL; } return err; } -- GitLab From a753103909a7e3d22147505d944da3d20759e1a5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 May 2021 12:11:14 -0700 Subject: [PATCH 0401/3804] f2fs: support iflag change given the mask In f2fs_fileattr_set(), if (!fa->flags_valid) mask &= FS_COMMON_FL; In this case, we can set supported flags by mask only instead of BUG_ON. /* Flags shared betwen flags/xflags */ (FS_SYNC_FL | FS_IMMUTABLE_FL | FS_APPEND_FL | \ FS_NODUMP_FL | FS_NOATIME_FL | FS_DAX_FL | \ FS_PROJINHERIT_FL) Fixes: 9b1bb01c8ae7 ("f2fs: convert to fileattr") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 44a4650aea7b7..ceb575f99048c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1817,7 +1817,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) struct f2fs_inode_info *fi = F2FS_I(inode); u32 masked_flags = fi->i_flags & mask; - f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask)); + /* mask can be shrunk by flags_valid selector */ + iflags &= mask; /* Is it quota file? Do not allow user to mess with it */ if (IS_NOQUOTA(inode)) -- GitLab From a12cc5b423d4f36dc1a1ea3911e49cf9dff43898 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 6 May 2021 17:00:43 +0800 Subject: [PATCH 0402/3804] f2fs: compress: fix to free compress page correctly In error path of f2fs_write_compressed_pages(), it needs to call f2fs_compress_free_page() to release temporary page. Fixes: 5e6bbde95982 ("f2fs: introduce mempool for {,de}compress intermediate page allocation") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 53b13787eb2c8..2acaefa100364 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1372,7 +1372,8 @@ out_destroy_crypt: for (i = 0; i < cc->nr_cpages; i++) { if (!cc->cpages[i]) continue; - f2fs_put_page(cc->cpages[i], 1); + f2fs_compress_free_page(cc->cpages[i]); + cc->cpages[i] = NULL; } out_put_cic: kmem_cache_free(cic_entry_slab, cic); -- GitLab From a949dc5f2c5cfe0c910b664650f45371254c0744 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 10 May 2021 17:30:31 +0800 Subject: [PATCH 0403/3804] f2fs: compress: fix race condition of overwrite vs truncate pos_fsstress testcase complains a panic as belew: ------------[ cut here ]------------ kernel BUG at fs/f2fs/compress.c:1082! invalid opcode: 0000 [#1] SMP PTI CPU: 4 PID: 2753477 Comm: kworker/u16:2 Tainted: G OE 5.12.0-rc1-custom #1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 Workqueue: writeback wb_workfn (flush-252:16) RIP: 0010:prepare_compress_overwrite+0x4c0/0x760 [f2fs] Call Trace: f2fs_prepare_compress_overwrite+0x5f/0x80 [f2fs] f2fs_write_cache_pages+0x468/0x8a0 [f2fs] f2fs_write_data_pages+0x2a4/0x2f0 [f2fs] do_writepages+0x38/0xc0 __writeback_single_inode+0x44/0x2a0 writeback_sb_inodes+0x223/0x4d0 __writeback_inodes_wb+0x56/0xf0 wb_writeback+0x1dd/0x290 wb_workfn+0x309/0x500 process_one_work+0x220/0x3c0 worker_thread+0x53/0x420 kthread+0x12f/0x150 ret_from_fork+0x22/0x30 The root cause is truncate() may race with overwrite as below, so that one reference count left in page can not guarantee the page attaching in mapping tree all the time, after truncation, later find_lock_page() may return NULL pointer. - prepare_compress_overwrite - f2fs_pagecache_get_page - unlock_page - f2fs_setattr - truncate_setsize - truncate_inode_page - delete_from_page_cache - find_lock_page Fix this by avoiding referencing updated page. Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 35 ++++++++++++----------------------- 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 2acaefa100364..79348bc56e351 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -117,19 +117,6 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len) f2fs_drop_rpages(cc, len, true); } -static void f2fs_put_rpages_mapping(struct address_space *mapping, - pgoff_t start, int len) -{ - int i; - - for (i = 0; i < len; i++) { - struct page *page = find_get_page(mapping, start + i); - - put_page(page); - put_page(page); - } -} - static void f2fs_put_rpages_wbc(struct compress_ctx *cc, struct writeback_control *wbc, bool redirty, int unlock) { @@ -1036,7 +1023,7 @@ retry: } if (PageUptodate(page)) - unlock_page(page); + f2fs_put_page(page, 1); else f2fs_compress_ctx_add_page(cc, page); } @@ -1046,32 +1033,34 @@ retry: ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, &last_block_in_bio, false, true); + f2fs_put_rpages(cc); f2fs_destroy_compress_ctx(cc); if (ret) - goto release_pages; + goto out; if (bio) f2fs_submit_bio(sbi, bio, DATA); ret = f2fs_init_compress_ctx(cc); if (ret) - goto release_pages; + goto out; } for (i = 0; i < cc->cluster_size; i++) { f2fs_bug_on(sbi, cc->rpages[i]); page = find_lock_page(mapping, start_idx + i); - f2fs_bug_on(sbi, !page); + if (!page) { + /* page can be truncated */ + goto release_and_retry; + } f2fs_wait_on_page_writeback(page, DATA, true, true); - f2fs_compress_ctx_add_page(cc, page); - f2fs_put_page(page, 0); if (!PageUptodate(page)) { +release_and_retry: + f2fs_put_rpages(cc); f2fs_unlock_rpages(cc, i + 1); - f2fs_put_rpages_mapping(mapping, start_idx, - cc->cluster_size); f2fs_destroy_compress_ctx(cc); goto retry; } @@ -1103,10 +1092,10 @@ retry: } unlock_pages: + f2fs_put_rpages(cc); f2fs_unlock_rpages(cc, i); -release_pages: - f2fs_put_rpages_mapping(mapping, start_idx, i); f2fs_destroy_compress_ctx(cc); +out: return ret; } -- GitLab From 8bfbfb0ddd706b1ce2e89259ecc45f192c0ec2bf Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 10 May 2021 17:30:32 +0800 Subject: [PATCH 0404/3804] f2fs: compress: fix to assign cc.cluster_idx correctly In f2fs_destroy_compress_ctx(), after f2fs_destroy_compress_ctx(), cc.cluster_idx will be cleared w/ NULL_CLUSTER, f2fs_cluster_blocks() may check wrong cluster metadata, fix it. Fixes: 4c8ff7095bef ("f2fs: support data compression") Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/compress.c | 17 +++++++++-------- fs/f2fs/data.c | 6 +++--- fs/f2fs/f2fs.h | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 79348bc56e351..925a5ca3744a9 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -145,13 +145,14 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc) return cc->rpages ? 0 : -ENOMEM; } -void f2fs_destroy_compress_ctx(struct compress_ctx *cc) +void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) { page_array_free(cc->inode, cc->rpages, cc->cluster_size); cc->rpages = NULL; cc->nr_rpages = 0; cc->nr_cpages = 0; - cc->cluster_idx = NULL_CLUSTER; + if (!reuse) + cc->cluster_idx = NULL_CLUSTER; } void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page) @@ -1034,7 +1035,7 @@ retry: ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size, &last_block_in_bio, false, true); f2fs_put_rpages(cc); - f2fs_destroy_compress_ctx(cc); + f2fs_destroy_compress_ctx(cc, true); if (ret) goto out; if (bio) @@ -1061,7 +1062,7 @@ retry: release_and_retry: f2fs_put_rpages(cc); f2fs_unlock_rpages(cc, i + 1); - f2fs_destroy_compress_ctx(cc); + f2fs_destroy_compress_ctx(cc, true); goto retry; } } @@ -1094,7 +1095,7 @@ release_and_retry: unlock_pages: f2fs_put_rpages(cc); f2fs_unlock_rpages(cc, i); - f2fs_destroy_compress_ctx(cc); + f2fs_destroy_compress_ctx(cc, true); out: return ret; } @@ -1130,7 +1131,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata, set_cluster_dirty(&cc); f2fs_put_rpages_wbc(&cc, NULL, false, 1); - f2fs_destroy_compress_ctx(&cc); + f2fs_destroy_compress_ctx(&cc, false); return first_index; } @@ -1350,7 +1351,7 @@ unlock_continue: f2fs_put_rpages(cc); page_array_free(cc->inode, cc->cpages, cc->nr_cpages); cc->cpages = NULL; - f2fs_destroy_compress_ctx(cc); + f2fs_destroy_compress_ctx(cc, false); return 0; out_destroy_crypt: @@ -1512,7 +1513,7 @@ write: err = f2fs_write_raw_pages(cc, submitted, wbc, io_type); f2fs_put_rpages_wbc(cc, wbc, false, 0); destroy_out: - f2fs_destroy_compress_ctx(cc); + f2fs_destroy_compress_ctx(cc, false); return err; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 96f1a354f89fd..33e56ae84e358 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2287,7 +2287,7 @@ static int f2fs_mpage_readpages(struct inode *inode, max_nr_pages, &last_block_in_bio, rac != NULL, false); - f2fs_destroy_compress_ctx(&cc); + f2fs_destroy_compress_ctx(&cc, false); if (ret) goto set_error_page; } @@ -2332,7 +2332,7 @@ next_page: max_nr_pages, &last_block_in_bio, rac != NULL, false); - f2fs_destroy_compress_ctx(&cc); + f2fs_destroy_compress_ctx(&cc, false); } } #endif @@ -3033,7 +3033,7 @@ next: } } if (f2fs_compressed_file(inode)) - f2fs_destroy_compress_ctx(&cc); + f2fs_destroy_compress_ctx(&cc, false); #endif if (retry) { index = 0; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 044878866ca34..c83d90125ebd9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3956,7 +3956,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc); void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed); void f2fs_put_page_dic(struct page *page); int f2fs_init_compress_ctx(struct compress_ctx *cc); -void f2fs_destroy_compress_ctx(struct compress_ctx *cc); +void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse); void f2fs_init_compress_info(struct f2fs_sb_info *sbi); int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi); void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi); -- GitLab From 576f9eacc680d2b1f37e8010cff62f7b227ea769 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 10 May 2021 14:55:09 +0800 Subject: [PATCH 0405/3804] net: stmmac: Fix MAC WoL not working if PHY does not support WoL Both get and set WoL will check device_can_wakeup(), if MAC supports PMT, it will set device wakeup capability. After commit 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy"), device wakeup capability will be overwrite in stmmac_init_phy() according to phy's Wol feature. If phy doesn't support WoL, then MAC will lose wakeup capability. To fix this issue, only overwrite device wakeup capability when MAC doesn't support PMT. For STMMAC now driver checks MAC's WoL capability if MAC supports PMT, if not support, driver will check PHY's WoL capability. Fixes: 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy") Reviewed-by: Jisheng Zhang Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 345b4c6d1fd40..fea3bf07ae892 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1196,7 +1196,6 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) */ static int stmmac_init_phy(struct net_device *dev) { - struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; struct stmmac_priv *priv = netdev_priv(dev); struct device_node *node; int ret; @@ -1222,8 +1221,12 @@ static int stmmac_init_phy(struct net_device *dev) ret = phylink_connect_phy(priv->phylink, phydev); } - phylink_ethtool_get_wol(priv->phylink, &wol); - device_set_wakeup_capable(priv->device, !!wol.supported); + if (!priv->plat->pmt) { + struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL }; + + phylink_ethtool_get_wol(priv->phylink, &wol); + device_set_wakeup_capable(priv->device, !!wol.supported); + } return ret; } -- GitLab From 29249eac5225429b898f278230a6ca2baa1ae154 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 11 May 2021 19:13:51 +0200 Subject: [PATCH 0406/3804] mptcp: fix data stream corruption Maxim reported several issues when forcing a TCP transparent proxy to use the MPTCP protocol for the inbound connections. He also provided a clean reproducer. The problem boils down to 'mptcp_frag_can_collapse_to()' assuming that only MPTCP will use the given page_frag. If others - e.g. the plain TCP protocol - allocate page fragments, we can end-up re-using already allocated memory for mptcp_data_frag. Fix the issue ensuring that the to-be-expanded data fragment is located at the current page frag end. v1 -> v2: - added missing fixes tag (Mat) Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/178 Reported-and-tested-by: Maxim Galaganov Fixes: 18b683bff89d ("mptcp: queue data for mptcp level retransmission") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 29a2d690d8d59..2d21a4793d9d0 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -879,12 +879,18 @@ static bool mptcp_skb_can_collapse_to(u64 write_seq, !mpext->frozen; } +/* we can append data to the given data frag if: + * - there is space available in the backing page_frag + * - the data frag tail matches the current page_frag free offset + * - the data frag end sequence number matches the current write seq + */ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, const struct page_frag *pfrag, const struct mptcp_data_frag *df) { return df && pfrag->page == df->page && pfrag->size - pfrag->offset > 0 && + pfrag->offset == (df->offset + df->data_len) && df->data_seq + df->data_len == msk->write_seq; } -- GitLab From bcbda3fc616272686208f9c4d5f6dccb65360bd8 Mon Sep 17 00:00:00 2001 From: Shannon Nelson Date: Tue, 11 May 2021 11:11:32 -0700 Subject: [PATCH 0407/3804] ionic: fix ptp support config breakage When IONIC=y and PTP_1588_CLOCK=m were set in the .config file the driver link failed with undefined references. We add the dependancy depends on PTP_1588_CLOCK || !PTP_1588_CLOCK to clear this up. If PTP_1588_CLOCK=m, the depends limits IONIC to =m (or disabled). If PTP_1588_CLOCK is disabled, IONIC can be any of y/m/n. Fixes: 61db421da31b ("ionic: link in the new hw timestamp code") Reported-by: kernel test robot Cc: Jakub Kicinski Cc: Randy Dunlap Cc: Allen Hubbe Signed-off-by: Shannon Nelson Acked-by: Randy Dunlap Signed-off-by: David S. Miller --- drivers/net/ethernet/pensando/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/pensando/Kconfig b/drivers/net/ethernet/pensando/Kconfig index 5f8b0bb3af6e3..202973a82712e 100644 --- a/drivers/net/ethernet/pensando/Kconfig +++ b/drivers/net/ethernet/pensando/Kconfig @@ -20,6 +20,7 @@ if NET_VENDOR_PENSANDO config IONIC tristate "Pensando Ethernet IONIC Support" depends on 64BIT && PCI + depends on PTP_1588_CLOCK || !PTP_1588_CLOCK select NET_DEVLINK select DIMLIB help -- GitLab From 440c3247cba3d9433ac435d371dd7927d68772a7 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 11 May 2021 14:42:04 -0500 Subject: [PATCH 0408/3804] net: ipa: memory region array is variable size IPA configuration data includes an array of memory region descriptors. That was a fixed-size array at one time, but at some point we started defining it such that it was only as big as required for a given platform. The actual number of entries in the array is recorded in the configuration data along with the array. A loop in ipa_mem_config() still assumes the array has entries for all defined memory region IDs. As a result, this loop can go past the end of the actual array and attempt to write "canary" values based on nonsensical data. Fix this, by stashing the number of entries in the array, and using that rather than IPA_MEM_COUNT in the initialization loop found in ipa_mem_config(). The only remaining use of IPA_MEM_COUNT is in a validation check to ensure configuration data doesn't have too many entries. That's fine for now. Fixes: 3128aae8c439a ("net: ipa: redefine struct ipa_mem_data") Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- drivers/net/ipa/ipa.h | 2 ++ drivers/net/ipa/ipa_mem.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h index e7ff376cb5b7d..744406832a774 100644 --- a/drivers/net/ipa/ipa.h +++ b/drivers/net/ipa/ipa.h @@ -58,6 +58,7 @@ enum ipa_flag { * @mem_virt: Virtual address of IPA-local memory space * @mem_offset: Offset from @mem_virt used for access to IPA memory * @mem_size: Total size (bytes) of memory at @mem_virt + * @mem_count: Number of entries in the mem array * @mem: Array of IPA-local memory region descriptors * @imem_iova: I/O virtual address of IPA region in IMEM * @imem_size: Size of IMEM region @@ -103,6 +104,7 @@ struct ipa { void *mem_virt; u32 mem_offset; u32 mem_size; + u32 mem_count; const struct ipa_mem *mem; unsigned long imem_iova; diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c index c5c3b1b7e67d5..1624125e7459f 100644 --- a/drivers/net/ipa/ipa_mem.c +++ b/drivers/net/ipa/ipa_mem.c @@ -180,7 +180,7 @@ int ipa_mem_config(struct ipa *ipa) * for the region, write "canary" values in the space prior to * the region's base address. */ - for (mem_id = 0; mem_id < IPA_MEM_COUNT; mem_id++) { + for (mem_id = 0; mem_id < ipa->mem_count; mem_id++) { const struct ipa_mem *mem = &ipa->mem[mem_id]; u16 canary_count; __le32 *canary; @@ -487,6 +487,7 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data) ipa->mem_size = resource_size(res); /* The ipa->mem[] array is indexed by enum ipa_mem_id values */ + ipa->mem_count = mem_data->local_count; ipa->mem = mem_data->local; ret = ipa_imem_init(ipa, mem_data->imem_addr, mem_data->imem_size); -- GitLab From a78339698ab1f43435fbe67fcd6de8f4f6eb9eec Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 May 2021 14:49:45 +0000 Subject: [PATCH 0409/3804] powerpc/interrupts: Fix kuep_unlock() call Same as kuap_user_restore(), kuep_unlock() has to be called when really returning to user, that is in interrupt_exit_user_prepare(), not in interrupt_exit_prepare(). Fixes: b5efec00b671 ("powerpc/32s: Move KUEP locking/unlocking in C") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/b831e54a2579db24fbef836ed415588ce2b3e825.1620312573.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 2 -- arch/powerpc/kernel/interrupt.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 44cde2e129b88..c77e8f57ff062 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -153,8 +153,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { - if (user_mode(regs)) - kuep_unlock(); } static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index e4559f8914eb7..ed6cebcb78475 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -427,6 +427,7 @@ again: /* Restore user access locks last */ kuap_user_restore(regs); + kuep_unlock(); return ret; } -- GitLab From 5d510ed78bcfcbbd3b3891cbe79cd7543bce1d05 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 6 May 2021 11:56:31 +0000 Subject: [PATCH 0410/3804] powerpc/syscall: Calling kuap_save_and_lock() is wrong kuap_save_and_lock() is only for interrupts inside kernel. system call are only from user, calling kuap_save_and_lock() is wrong. Fixes: c16728835eec ("powerpc/32: Manage KUAP in C") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/332773775cf24a422105dee2d383fb8f04589045.1620302182.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/interrupt.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index ed6cebcb78475..e0938ba298f2a 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -34,9 +34,6 @@ notrace long system_call_exception(long r3, long r4, long r5, syscall_fn f; kuep_lock(); -#ifdef CONFIG_PPC32 - kuap_save_and_lock(regs); -#endif regs->orig_gpr3 = r3; -- GitLab From 2c8c89b95831f46a2fb31a8d0fef4601694023ce Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 8 May 2021 20:14:52 +1000 Subject: [PATCH 0411/3804] powerpc/pseries: Fix hcall tracing recursion in pv queued spinlocks The paravit queued spinlock slow path adds itself to the queue then calls pv_wait to wait for the lock to become free. This is implemented by calling H_CONFER to donate cycles. When hcall tracing is enabled, this H_CONFER call can lead to a spin lock being taken in the tracing code, which will result in the lock to be taken again, which will also go to the slow path because it queues behind itself and so won't ever make progress. An example trace of a deadlock: __pv_queued_spin_lock_slowpath trace_clock_global ring_buffer_lock_reserve trace_event_buffer_lock_reserve trace_event_buffer_reserve trace_event_raw_event_hcall_exit __trace_hcall_exit plpar_hcall_norets_trace __pv_queued_spin_lock_slowpath trace_clock_global ring_buffer_lock_reserve trace_event_buffer_lock_reserve trace_event_buffer_reserve trace_event_raw_event_rcu_dyntick rcu_irq_exit irq_exit __do_irq call_do_irq do_IRQ hardware_interrupt_common_virt Fix this by introducing plpar_hcall_norets_notrace(), and using that to make SPLPAR virtual processor dispatching hcalls by the paravirt spinlock code. Signed-off-by: Nicholas Piggin Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210508101455.1578318-2-npiggin@gmail.com --- arch/powerpc/include/asm/hvcall.h | 3 +++ arch/powerpc/include/asm/paravirt.h | 22 +++++++++++++++++++--- arch/powerpc/platforms/pseries/hvCall.S | 10 ++++++++++ arch/powerpc/platforms/pseries/lpar.c | 3 +-- 4 files changed, 33 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 4430509060185..e3b29eda8074c 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -448,6 +448,9 @@ */ long plpar_hcall_norets(unsigned long opcode, ...); +/* Variant which does not do hcall tracing */ +long plpar_hcall_norets_notrace(unsigned long opcode, ...); + /** * plpar_hcall: - Make a pseries hypervisor call * @opcode: The hypervisor call to make. diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index 5d1726bb28e79..bcb7b5f917be6 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -28,19 +28,35 @@ static inline u32 yield_count_of(int cpu) return be32_to_cpu(yield_count); } +/* + * Spinlock code confers and prods, so don't trace the hcalls because the + * tracing code takes spinlocks which can cause recursion deadlocks. + * + * These calls are made while the lock is not held: the lock slowpath yields if + * it can not acquire the lock, and unlock slow path might prod if a waiter has + * yielded). So this may not be a problem for simple spin locks because the + * tracing does not technically recurse on the lock, but we avoid it anyway. + * + * However the queued spin lock contended path is more strictly ordered: the + * H_CONFER hcall is made after the task has queued itself on the lock, so then + * recursing on that lock will cause the task to then queue up again behind the + * first instance (or worse: queued spinlocks use tricks that assume a context + * never waits on more than one spinlock, so such recursion may cause random + * corruption in the lock code). + */ static inline void yield_to_preempted(int cpu, u32 yield_count) { - plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); + plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count); } static inline void prod_cpu(int cpu) { - plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu)); + plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu)); } static inline void yield_to_any(void) { - plpar_hcall_norets(H_CONFER, -1, 0); + plpar_hcall_norets_notrace(H_CONFER, -1, 0); } #else static inline bool is_shared_processor(void) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 2136e42833af3..8a2b8d64265bc 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -102,6 +102,16 @@ END_FTR_SECTION(0, 1); \ #define HCALL_BRANCH(LABEL) #endif +_GLOBAL_TOC(plpar_hcall_norets_notrace) + HMT_MEDIUM + + mfcr r0 + stw r0,8(r1) + HVSC /* invoke the hypervisor */ + lwz r0,8(r1) + mtcrf 0xff,r0 + blr /* return r3 = status */ + _GLOBAL_TOC(plpar_hcall_norets) HMT_MEDIUM diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 1f3152ad72132..b619568a4d04a 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1830,8 +1830,7 @@ void hcall_tracepoint_unregfunc(void) /* * Since the tracing code might execute hcalls we need to guard against - * recursion. One example of this are spinlocks calling H_YIELD on - * shared processor partitions. + * recursion. */ static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); -- GitLab From a3f1a39a5643d5c5ed3eee4edd933e0ebfeeed6e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 8 May 2021 20:14:53 +1000 Subject: [PATCH 0412/3804] powerpc/pseries: Don't trace hcall tracing wrapper This doesn't seem very useful to trace before the recursion check, even if the ftrace code has any recursion checks of its own. Be on the safe side and don't trace the hcall trace wrappers. Reported-by: Naveen N. Rao Signed-off-by: Nicholas Piggin Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210508101455.1578318-3-npiggin@gmail.com --- arch/powerpc/platforms/pseries/lpar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index b619568a4d04a..d79d7410c3204 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1835,7 +1835,7 @@ void hcall_tracepoint_unregfunc(void) static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); -void __trace_hcall_entry(unsigned long opcode, unsigned long *args) +notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args) { unsigned long flags; unsigned int *depth; @@ -1863,7 +1863,7 @@ out: local_irq_restore(flags); } -void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) +notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) { unsigned long flags; unsigned int *depth; -- GitLab From 7058f4b13edd9dd2cb3c5b4fe340d8307dbe0208 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 8 May 2021 20:14:54 +1000 Subject: [PATCH 0413/3804] powerpc/pseries: use notrace hcall variant for H_CEDE idle Rather than special-case H_CEDE in the hcall trace wrappers, make the idle H_CEDE call use plpar_hcall_norets_notrace(). Signed-off-by: Nicholas Piggin Reviewed-by: Naveen N. Rao Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210508101455.1578318-4-npiggin@gmail.com --- arch/powerpc/include/asm/plpar_wrappers.h | 6 +++++- arch/powerpc/platforms/pseries/lpar.c | 10 ---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index ece84a430701f..83e0f701ebc67 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -28,7 +28,11 @@ static inline void set_cede_latency_hint(u8 latency_hint) static inline long cede_processor(void) { - return plpar_hcall_norets(H_CEDE); + /* + * We cannot call tracepoints inside RCU idle regions which + * means we must not trace H_CEDE. + */ + return plpar_hcall_norets_notrace(H_CEDE); } static inline long extended_cede_processor(unsigned long latency_hint) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index d79d7410c3204..ad1cec80019bb 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1840,13 +1840,6 @@ notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args) unsigned long flags; unsigned int *depth; - /* - * We cannot call tracepoints inside RCU idle regions which - * means we must not trace H_CEDE. - */ - if (opcode == H_CEDE) - return; - local_irq_save(flags); depth = this_cpu_ptr(&hcall_trace_depth); @@ -1868,9 +1861,6 @@ notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) unsigned long flags; unsigned int *depth; - if (opcode == H_CEDE) - return; - local_irq_save(flags); depth = this_cpu_ptr(&hcall_trace_depth); -- GitLab From 4f242fc5f2e24412b89e934dad025b10293b2712 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 8 May 2021 20:14:55 +1000 Subject: [PATCH 0414/3804] powerpc/pseries: warn if recursing into the hcall tracing code The hcall tracing code has a recursion check built in, which skips tracing if we are already tracing an hcall. However if the tracing code has problems with recursion, this check may not catch all cases because the tracing code could be invoked from a different tracepoint first, then make an hcall that gets traced, then recurse. Add an explicit warning if recursion is detected here, which might help to notice tracing code making hcalls. Really the core trace code should have its own recursion checking and warnings though. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210508101455.1578318-5-npiggin@gmail.com --- arch/powerpc/platforms/pseries/lpar.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index ad1cec80019bb..dab356e3ff87c 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -1829,8 +1829,14 @@ void hcall_tracepoint_unregfunc(void) #endif /* - * Since the tracing code might execute hcalls we need to guard against - * recursion. + * Keep track of hcall tracing depth and prevent recursion. Warn if any is + * detected because it may indicate a problem. This will not catch all + * problems with tracing code making hcalls, because the tracing might have + * been invoked from a non-hcall, so the first hcall could recurse into it + * without warning here, but this better than nothing. + * + * Hcalls with specific problems being traced should use the _notrace + * plpar_hcall variants. */ static DEFINE_PER_CPU(unsigned int, hcall_trace_depth); @@ -1844,7 +1850,7 @@ notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args) depth = this_cpu_ptr(&hcall_trace_depth); - if (*depth) + if (WARN_ON_ONCE(*depth)) goto out; (*depth)++; @@ -1865,7 +1871,7 @@ notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf) depth = this_cpu_ptr(&hcall_trace_depth); - if (*depth) + if (*depth) /* Don't warn again on the way out */ goto out; (*depth)++; -- GitLab From 7315e457d6bc342d06ba0b7ee498221c5237a547 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 8 May 2021 09:25:32 +0000 Subject: [PATCH 0415/3804] powerpc/uaccess: Fix __get_user() with CONFIG_CC_HAS_ASM_GOTO_OUTPUT Building kernel mainline with GCC 11 leads to following failure when starting 'init': init[1]: bad frame in sys_sigreturn: 7ff5a900 nip 001083cc lr 001083c4 Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b This is an issue due to a segfault happening in __unsafe_restore_general_regs() in a loop copying registers from user to kernel: 10: 7d 09 03 a6 mtctr r8 14: 80 ca 00 00 lwz r6,0(r10) 18: 80 ea 00 04 lwz r7,4(r10) 1c: 90 c9 00 08 stw r6,8(r9) 20: 90 e9 00 0c stw r7,12(r9) 24: 39 0a 00 08 addi r8,r10,8 28: 39 29 00 08 addi r9,r9,8 2c: 81 4a 00 08 lwz r10,8(r10) <== r10 is clobbered here 30: 81 6a 00 0c lwz r11,12(r10) 34: 91 49 00 08 stw r10,8(r9) 38: 91 69 00 0c stw r11,12(r9) 3c: 39 48 00 08 addi r10,r8,8 40: 39 29 00 08 addi r9,r9,8 44: 42 00 ff d0 bdnz 14 <__unsafe_restore_general_regs+0x14> As shown above, this is due to r10 being re-used by GCC. This didn't happen with CLANG. This is fixed by tagging 'x' output as an earlyclobber operand in __get_user_asm2_goto(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cf0a050d124d4f426cdc7a74009d17b01d8d8969.1620465917.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index a09e4240c5b16..22c79ab400060 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -157,7 +157,7 @@ do { \ "2: lwz%X1 %L0, %L1\n" \ EX_TABLE(1b, %l2) \ EX_TABLE(2b, %l2) \ - : "=r" (x) \ + : "=&r" (x) \ : "m" (*addr) \ : \ : label) -- GitLab From bc581dbab26edf0b6acc98c76943b4a0c7d672a2 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 8 May 2021 09:25:44 +0000 Subject: [PATCH 0416/3804] powerpc/signal: Fix possible build failure with unsafe_copy_fpr_{to/from}_user When neither CONFIG_VSX nor CONFIG_PPC_FPU_REGS are selected, unsafe_copy_fpr_to_user() and unsafe_copy_fpr_from_user() are doing nothing. Then, unless the 'label' operand is used elsewhere, GCC complains about it being defined but not used. To fix that, add an impossible 'goto label'. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/cadc0a328bc8e6c5bf133193e7547d5c10ae7895.1620465920.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/signal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index f4aafa337c2ed..1f07317964e49 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -166,9 +166,9 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ #else -#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0) +#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0) -#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0) +#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0) static inline unsigned long copy_fpr_to_user(void __user *to, struct task_struct *task) -- GitLab From 63970f3c37e75997ed86dbdfdc83df35f2152bb1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 8 May 2021 06:36:21 +0000 Subject: [PATCH 0417/3804] powerpc/legacy_serial: Fix UBSAN: array-index-out-of-bounds UBSAN complains when a pointer is calculated with invalid 'legacy_serial_console' index, allthough the index is verified before dereferencing the pointer. Fix it by checking 'legacy_serial_console' validity before calculating pointers. Fixes: 0bd3f9e953bd ("powerpc/legacy_serial: Use early_ioremap()") Reported-by: Paul Menzel Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210511010712.750096-1-mpe@ellerman.id.au --- arch/powerpc/kernel/legacy_serial.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 8b2c1a8553a0e..cfc03e016ff2d 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -356,13 +356,16 @@ static void __init setup_legacy_serial_console(int console) static int __init ioremap_legacy_serial_console(void) { - struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console]; - struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console]; + struct plat_serial8250_port *port; + struct legacy_serial_info *info; void __iomem *vaddr; if (legacy_serial_console < 0) return 0; + info = &legacy_serial_infos[legacy_serial_console]; + port = &legacy_serial_ports[legacy_serial_console]; + if (!info->early_addr) return 0; -- GitLab From da3bb206c9ceb0736d9e2897ea697acabad35833 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 11 May 2021 20:54:59 +1000 Subject: [PATCH 0418/3804] KVM: PPC: Book3S HV: Fix kvm_unmap_gfn_range_hv() for Hash MMU Commit 32b48bf8514c ("KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks") fixed kvm_unmap_gfn_range_hv() by adding a for loop over each gfn in the range. But for the Hash MMU it repeatedly calls kvm_unmap_rmapp() with the first gfn of the range, rather than iterating through the range. This exhibits as strange guest behaviour, sometimes crashing in firmare, or booting and then guest userspace crashing unexpectedly. Fix it by passing the iterator, gfn, to kvm_unmap_rmapp(). Fixes: 32b48bf8514c ("KVM: PPC: Book3S HV: Fix conversion to gfn-based MMU notifier callbacks") Reviewed-by: Sean Christopherson Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210511105459.800788-1-mpe@ellerman.id.au --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2d9193cd73be4..c63e263312a4f 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -840,7 +840,7 @@ bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range) kvm_unmap_radix(kvm, range->slot, gfn); } else { for (gfn = range->start; gfn < range->end; gfn++) - kvm_unmap_rmapp(kvm, range->slot, range->start); + kvm_unmap_rmapp(kvm, range->slot, gfn); } return false; -- GitLab From e9f4eee9a0023ba22db9560d4cc6ee63f933dae8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 11 May 2021 21:38:36 -0400 Subject: [PATCH 0419/3804] blk-iocost: fix weight updates of inner active iocgs When the weight of an active iocg is updated, weight_updated() is called which in turn calls __propagate_weights() to update the active and inuse weights so that the effective hierarchical weights are update accordingly. The current implementation is incorrect for inner active nodes. For an active leaf iocg, inuse can be any value between 1 and active and the difference represents how much the iocg is donating. When weight is updated, as long as inuse is clamped between 1 and the new weight, we're alright and this is what __propagate_weights() currently implements. However, that's not how an active inner node's inuse is set. An inner node's inuse is solely determined by the ratio between the sums of inuse's and active's of its children - ie. they're results of propagating the leaves' active and inuse weights upwards. __propagate_weights() incorrectly applies the same clamping as for a leaf when an active inner node's weight is updated. Consider a hierarchy which looks like the following with saturating workloads in AA and BB. R / \ A B | | AA BB 1. For both A and B, active=100, inuse=100, hwa=0.5, hwi=0.5. 2. echo 200 > A/io.weight 3. __propagate_weights() update A's active to 200 and leave inuse at 100 as it's already between 1 and the new active, making A:active=200, A:inuse=100. As R's active_sum is updated along with A's active, A:hwa=2/3, B:hwa=1/3. However, because the inuses didn't change, the hwi's remain unchanged at 0.5. 4. The weight of A is now twice that of B but AA and BB still have the same hwi of 0.5 and thus are doing the same amount of IOs. Fix it by making __propgate_weights() always calculate the inuse of an active inner iocg based on the ratio of child_inuse_sum to child_active_sum. Signed-off-by: Tejun Heo Reported-by: Dan Schatzberg Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost") Cc: stable@vger.kernel.org # v5.4+ Link: https://lore.kernel.org/r/YJsxnLZV1MnBcqjj@slm.duckdns.org Signed-off-by: Jens Axboe --- block/blk-iocost.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index e0c4baa018578..c2d6bc88d3f15 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -1069,7 +1069,17 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, lockdep_assert_held(&ioc->lock); - inuse = clamp_t(u32, inuse, 1, active); + /* + * For an active leaf node, its inuse shouldn't be zero or exceed + * @active. An active internal node's inuse is solely determined by the + * inuse to active ratio of its children regardless of @inuse. + */ + if (list_empty(&iocg->active_list) && iocg->child_active_sum) { + inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum, + iocg->child_active_sum); + } else { + inuse = clamp_t(u32, inuse, 1, active); + } iocg->last_inuse = iocg->inuse; if (save) @@ -1086,7 +1096,7 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, /* update the level sums */ parent->child_active_sum += (s32)(active - child->active); parent->child_inuse_sum += (s32)(inuse - child->inuse); - /* apply the udpates */ + /* apply the updates */ child->active = active; child->inuse = inuse; -- GitLab From ca298241bc229303ff683db7265a2c625a9c00fe Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 11 May 2021 14:38:47 -0700 Subject: [PATCH 0420/3804] f2fs: avoid swapon failure by giving a warning first The final solution can be migrating blocks to form a section-aligned file internally. Meanwhile, let's ask users to do that when preparing the swap file initially like: 1) create() 2) ioctl(F2FS_IOC_SET_PIN_FILE) 3) fallocate() Reported-by: kernel test robot Fixes: 36e4d95891ed ("f2fs: check if swapfile is section-alligned") Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 33e56ae84e358..41e260680b27c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3801,6 +3801,7 @@ static int f2fs_is_file_aligned(struct inode *inode) block_t pblock; unsigned long nr_pblocks; unsigned int blocks_per_sec = BLKS_PER_SEC(sbi); + unsigned int not_aligned = 0; int ret = 0; cur_lblock = 0; @@ -3833,13 +3834,20 @@ static int f2fs_is_file_aligned(struct inode *inode) if ((pblock - main_blkaddr) & (blocks_per_sec - 1) || nr_pblocks & (blocks_per_sec - 1)) { - f2fs_err(sbi, "Swapfile does not align to section"); - ret = -EINVAL; - goto out; + if (f2fs_is_pinned_file(inode)) { + f2fs_err(sbi, "Swapfile does not align to section"); + ret = -EINVAL; + goto out; + } + not_aligned++; } cur_lblock += nr_pblocks; } + if (not_aligned) + f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n" + "\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()", + not_aligned); out: return ret; } @@ -3858,6 +3866,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, int nr_extents = 0; unsigned long nr_pblocks; unsigned int blocks_per_sec = BLKS_PER_SEC(sbi); + unsigned int not_aligned = 0; int ret = 0; /* @@ -3896,9 +3905,12 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) || nr_pblocks & (blocks_per_sec - 1)) { - f2fs_err(sbi, "Swapfile does not align to section"); - ret = -EINVAL; - goto out; + if (f2fs_is_pinned_file(inode)) { + f2fs_err(sbi, "Swapfile does not align to section"); + ret = -EINVAL; + goto out; + } + not_aligned++; } if (cur_lblock + nr_pblocks >= sis->max) @@ -3927,6 +3939,11 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, sis->max = cur_lblock; sis->pages = cur_lblock - 1; sis->highest_bit = cur_lblock - 1; + + if (not_aligned) + f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n" + "\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()", + not_aligned); out: return ret; } -- GitLab From 2b17c400aeb44daf041627722581ade527bb3c1d Mon Sep 17 00:00:00 2001 From: Norbert Slusarek Date: Wed, 12 May 2021 00:43:54 +0200 Subject: [PATCH 0421/3804] can: isotp: prevent race between isotp_bind() and isotp_setsockopt() A race condition was found in isotp_setsockopt() which allows to change socket options after the socket was bound. For the specific case of SF_BROADCAST support, this might lead to possible use-after-free because can_rx_unregister() is not called. Checking for the flag under the socket lock in isotp_bind() and taking the lock in isotp_setsockopt() fixes the issue. Fixes: 921ca574cd38 ("can: isotp: add SF_BROADCAST support for functional addressing") Link: https://lore.kernel.org/r/trinity-e6ae9efa-9afb-4326-84c0-f3609b9b8168-1620773528307@3c-app-gmx-bs06 Reported-by: Norbert Slusarek Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Norbert Slusarek Acked-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- net/can/isotp.c | 49 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/net/can/isotp.c b/net/can/isotp.c index 9f94ad3caee92..253b24417c8e5 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1062,27 +1062,31 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) if (len < ISOTP_MIN_NAMELEN) return -EINVAL; + if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) + return -EADDRNOTAVAIL; + + if (!addr->can_ifindex) + return -ENODEV; + + lock_sock(sk); + /* do not register frame reception for functional addressing */ if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) do_rx_reg = 0; /* do not validate rx address for functional addressing */ if (do_rx_reg) { - if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) - return -EADDRNOTAVAIL; + if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) { + err = -EADDRNOTAVAIL; + goto out; + } - if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) - return -EADDRNOTAVAIL; + if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) { + err = -EADDRNOTAVAIL; + goto out; + } } - if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) - return -EADDRNOTAVAIL; - - if (!addr->can_ifindex) - return -ENODEV; - - lock_sock(sk); - if (so->bound && addr->can_ifindex == so->ifindex && addr->can_addr.tp.rx_id == so->rxid && addr->can_addr.tp.tx_id == so->txid) @@ -1164,16 +1168,13 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) return ISOTP_MIN_NAMELEN; } -static int isotp_setsockopt(struct socket *sock, int level, int optname, +static int isotp_setsockopt_locked(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct isotp_sock *so = isotp_sk(sk); int ret = 0; - if (level != SOL_CAN_ISOTP) - return -EINVAL; - if (so->bound) return -EISCONN; @@ -1248,6 +1249,22 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname, return ret; } +static int isotp_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) + +{ + struct sock *sk = sock->sk; + int ret; + + if (level != SOL_CAN_ISOTP) + return -EINVAL; + + lock_sock(sk); + ret = isotp_setsockopt_locked(sock, level, optname, optval, optlen); + release_sock(sk); + return ret; +} + static int isotp_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { -- GitLab From 02dbb7246c5bbbbe1607ebdc546ba5c454a664b1 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Tue, 11 May 2021 20:46:09 +0530 Subject: [PATCH 0422/3804] sched/fair: Fix clearing of has_idle_cores flag in select_idle_cpu() In commit: 9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()") in select_idle_cpu(), we check if an idle core is present in the LLC of the target CPU via the flag "has_idle_cores". We look for the idle core in select_idle_cores(). If select_idle_cores() isn't able to find an idle core/CPU, we need to unset the has_idle_cores flag in the LLC of the target to prevent other CPUs from going down this route. However, the current code is unsetting it in the LLC of the current CPU instead of the target CPU. This patch fixes this issue. Fixes: 9fe1f127b913 ("sched/fair: Merge select_idle_core/cpu()") Signed-off-by: Gautham R. Shenoy Signed-off-by: Ingo Molnar Reviewed-by: Vincent Guittot Reviewed-by: Srikar Dronamraju Acked-by: Mel Gorman Link: https://lore.kernel.org/r/1620746169-13996-1-git-send-email-ego@linux.vnet.ibm.com --- kernel/sched/fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 20aa234ffe04c..3248e24a90b0f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6217,7 +6217,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool } if (has_idle_core) - set_idle_cores(this, false); + set_idle_cores(target, false); if (sched_feat(SIS_PROP) && !has_idle_core) { time = cpu_clock(this) - time; -- GitLab From 440e906702410f59ae5397ec9e3b639edb53f80e Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 10 May 2021 15:48:49 -0700 Subject: [PATCH 0423/3804] perf/x86/intel/uncore: Drop unnecessary NULL checks after container_of() The parameter passed to the pmu_enable() and pmu_disable() functions can not be NULL because it is dereferenced by the caller. That means the result of container_of() on that parameter can also never be NULL. The existing NULL checks are therefore unnecessary and misleading. Remove them. This change was made automatically with the following Coccinelle script. @@ type t; identifier v; statement s; @@ <+... ( t v = container_of(...); | v = container_of(...); ) ... when != v - if (\( !v \| v == NULL \) ) s ...+> Signed-off-by: Guenter Roeck Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510224849.2349861-1-linux@roeck-us.net --- arch/x86/events/intel/uncore.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index df7b07d7fdcb5..9bf4dbbc26e22 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -801,8 +801,6 @@ static void uncore_pmu_enable(struct pmu *pmu) struct intel_uncore_box *box; uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); - if (!uncore_pmu) - return; box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); if (!box) @@ -818,8 +816,6 @@ static void uncore_pmu_disable(struct pmu *pmu) struct intel_uncore_box *box; uncore_pmu = container_of(pmu, struct intel_uncore_pmu, pmu); - if (!uncore_pmu) - return; box = uncore_pmu_to_box(uncore_pmu, smp_processor_id()); if (!box) -- GitLab From 6627eb25e40cc8d135d3f8d5391851d18ac497d7 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 10 May 2021 11:53:10 -0700 Subject: [PATCH 0424/3804] x86/entry: Unify definitions from and The register offsets in are duplicated in entry/calling.h, but are formatted differently and therefore not compatible. Use the version from consistently. Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510185316.3307264-2-hpa@zytor.com --- arch/x86/entry/calling.h | 36 +----------------------------------- arch/x86/kernel/head_64.S | 6 +++--- 2 files changed, 4 insertions(+), 38 deletions(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 07a9331d55e73..7436d4a74ecbf 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -6,6 +6,7 @@ #include #include #include +#include /* @@ -62,41 +63,6 @@ For 32-bit we have the following conventions - kernel is built with * for assembly code: */ -/* The layout forms the "struct pt_regs" on the stack: */ -/* - * C ABI says these regs are callee-preserved. They aren't saved on kernel entry - * unless syscall needs a complete, fully filled "struct pt_regs". - */ -#define R15 0*8 -#define R14 1*8 -#define R13 2*8 -#define R12 3*8 -#define RBP 4*8 -#define RBX 5*8 -/* These regs are callee-clobbered. Always saved on kernel entry. */ -#define R11 6*8 -#define R10 7*8 -#define R9 8*8 -#define R8 9*8 -#define RAX 10*8 -#define RCX 11*8 -#define RDX 12*8 -#define RSI 13*8 -#define RDI 14*8 -/* - * On syscall entry, this is syscall#. On CPU exception, this is error code. - * On hw interrupt, it's IRQ number: - */ -#define ORIG_RAX 15*8 -/* Return frame for iretq */ -#define RIP 16*8 -#define CS 17*8 -#define EFLAGS 18*8 -#define RSP 19*8 -#define SS 20*8 - -#define SIZEOF_PTREGS 21*8 - .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 .if \save_ret pushq %rsi /* pt_regs->si */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 04bddaaba8e25..d8b3ebd2bb85f 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -62,7 +62,7 @@ SYM_CODE_START_NOALIGN(startup_64) */ /* Set up the stack for verify_cpu(), similar to initial_stack below */ - leaq (__end_init_task - SIZEOF_PTREGS)(%rip), %rsp + leaq (__end_init_task - FRAME_SIZE)(%rip), %rsp leaq _text(%rip), %rdi pushq %rsi @@ -343,10 +343,10 @@ SYM_DATA(initial_vc_handler, .quad handle_vc_boot_ghcb) #endif /* - * The SIZEOF_PTREGS gap is a convention which helps the in-kernel unwinder + * The FRAME_SIZE gap is a convention which helps the in-kernel unwinder * reliably detect the end of the stack. */ -SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS) +SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - FRAME_SIZE) __FINITDATA __INIT -- GitLab From 3e5e7f7736b05d5fdf2cc4e0ba4f2d8bc42c630d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 10 May 2021 11:53:11 -0700 Subject: [PATCH 0425/3804] x86/entry: Reverse arguments to do_syscall_64() Reverse the order of arguments to do_syscall_64() so that the first argument is the pt_regs pointer. This is not only consistent with *all* other entry points from assembly, but it actually makes the compiled code slightly better. Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510185316.3307264-3-hpa@zytor.com --- arch/x86/entry/common.c | 2 +- arch/x86/entry/entry_64.S | 4 ++-- arch/x86/include/asm/syscall.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 7b2542b13ebd9..00da0f5420de8 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -36,7 +36,7 @@ #include #ifdef CONFIG_X86_64 -__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs) +__visible noinstr void do_syscall_64(struct pt_regs *regs, unsigned long nr) { add_random_kstack_offset(); nr = syscall_enter_from_user_mode(regs, nr); diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a16a5294d55f6..1d9db15fdc692 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -107,8 +107,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) PUSH_AND_CLEAR_REGS rax=$-ENOSYS /* IRQs are off. */ - movq %rax, %rdi - movq %rsp, %rsi + movq %rsp, %rdi + movq %rax, %rsi call do_syscall_64 /* returns with IRQs disabled */ /* diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 7cbf733d11afd..4e20054d7533e 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -160,7 +160,7 @@ static inline int syscall_get_arch(struct task_struct *task) ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64; } -void do_syscall_64(unsigned long nr, struct pt_regs *regs); +void do_syscall_64(struct pt_regs *regs, unsigned long nr); void do_int80_syscall_32(struct pt_regs *regs); long do_fast_syscall_32(struct pt_regs *regs); -- GitLab From dce0aa3b2ef28900cc4c779c59a870f1b4bdadee Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 10 May 2021 11:53:12 -0700 Subject: [PATCH 0426/3804] x86/syscall: Unconditionally prototype {ia32,x32}_sys_call_table[] Even if these APIs are disabled, and the arrays therefore do not exist, having the prototypes allows us to use IS_ENABLED() rather than using #ifdefs. If something ends up trying to actually *use* these arrays a linker error will ensue. Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510185316.3307264-4-hpa@zytor.com --- arch/x86/include/asm/syscall.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 4e20054d7533e..f6593cafdbd93 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -21,13 +21,12 @@ extern const sys_call_ptr_t sys_call_table[]; #if defined(CONFIG_X86_32) #define ia32_sys_call_table sys_call_table -#endif - -#if defined(CONFIG_IA32_EMULATION) +#else +/* + * These may not exist, but still put the prototypes in so we + * can use IS_ENABLED(). + */ extern const sys_call_ptr_t ia32_sys_call_table[]; -#endif - -#ifdef CONFIG_X86_X32_ABI extern const sys_call_ptr_t x32_sys_call_table[]; #endif -- GitLab From 6de4ac1d03f75248974a398110b15af0bfe65a11 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 10 May 2021 11:53:13 -0700 Subject: [PATCH 0427/3804] x86/syscall: Maximize MSR_SYSCALL_MASK It is better to clear as many flags as possible when we do a system call entry, as opposed to the other way around. The fewer flags we keep, the lesser the possible interference between the kernel and user space. The flags changed are: - CF, PF, AF, ZF, SF, OF: these are arithmetic flags which affect branches, possibly speculatively. They should be cleared for the same reasons we now clear all GPRs on entry. - RF: suppresses a code breakpoint on the subsequent instruction. It is probably impossible to enter the kernel with RF set, but if it is somehow not, it would break a kernel debugger setting a breakpoint on the entry point. Either way, user space should not be able to control kernel behavior here. - ID: this flag has no direct effect (it is a scratch bit only.) However, there is no reason to retain the user space value in the kernel, and the standard should be to clear unless needed, not the other way around. Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510185316.3307264-5-hpa@zytor.com --- arch/x86/kernel/cpu/common.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a1b756c49a93a..6cf6975746616 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1773,10 +1773,16 @@ void syscall_init(void) wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); #endif - /* Flags to clear on syscall */ + /* + * Flags to clear on syscall; clear as much as possible + * to minimize user space-kernel interference. + */ wrmsrl(MSR_SYSCALL_MASK, - X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF| - X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT); + X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF| + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_TF| + X86_EFLAGS_IF|X86_EFLAGS_DF|X86_EFLAGS_OF| + X86_EFLAGS_IOPL|X86_EFLAGS_NT|X86_EFLAGS_RF| + X86_EFLAGS_AC|X86_EFLAGS_ID); } #else /* CONFIG_X86_64 */ -- GitLab From 29e9758966f47004bd7245e6adadcb708386f36a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin (Intel)" Date: Mon, 10 May 2021 11:53:14 -0700 Subject: [PATCH 0428/3804] x86/entry: Split PUSH_AND_CLEAR_REGS into two submacros PUSH_AND_CLEAR_REGS, as the name implies, performs two functions: pushing registers and clearing registers. They don't necessarily have to be performed in immediate sequence, although all current users do. Split it into two macros for the case where that isn't desired; the FRED enabling patchset will eventually make use of this. Signed-off-by: H. Peter Anvin (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510185316.3307264-6-hpa@zytor.com --- arch/x86/entry/calling.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h index 7436d4a74ecbf..a4c061fb7c6ea 100644 --- a/arch/x86/entry/calling.h +++ b/arch/x86/entry/calling.h @@ -63,7 +63,7 @@ For 32-bit we have the following conventions - kernel is built with * for assembly code: */ -.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 +.macro PUSH_REGS rdx=%rdx rax=%rax save_ret=0 .if \save_ret pushq %rsi /* pt_regs->si */ movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ @@ -90,7 +90,9 @@ For 32-bit we have the following conventions - kernel is built with .if \save_ret pushq %rsi /* return address on top of stack */ .endif +.endm +.macro CLEAR_REGS /* * Sanitize registers of values that a speculation attack might * otherwise want to exploit. The lower registers are likely clobbered @@ -112,6 +114,11 @@ For 32-bit we have the following conventions - kernel is built with .endm +.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 + PUSH_REGS rdx=\rdx, rax=\rax, save_ret=\save_ret + CLEAR_REGS +.endm + .macro POP_REGS pop_rdi=1 skip_r11rcx=0 popq %r15 popq %r14 -- GitLab From 9ddcb87b9218dec760e8d8a780bc8ad514c3d36a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 10 May 2021 11:53:15 -0700 Subject: [PATCH 0429/3804] x86/regs: Syscall_get_nr() returns -1 for a non-system call syscall_get_nr() is defined to return -1 for a non-system call or a ptrace/seccomp restart; not just any arbitrary number. See comment in for the official definition of this function. Signed-off-by: H. Peter Anvin Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510185316.3307264-7-hpa@zytor.com --- arch/x86/kernel/ptrace.c | 2 +- arch/x86/kernel/signal.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 87a4143aa7d7c..4c208ea3bd9f3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -911,7 +911,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value) * syscall with TS_COMPAT still set. */ regs->orig_ax = value; - if (syscall_get_nr(child, regs) >= 0) + if (syscall_get_nr(child, regs) != -1) child->thread_info.status |= TS_I386_REGS_POKED; break; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index a06cb107c0e88..e12779a2714dc 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -713,7 +713,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL); /* Are we from a system call? */ - if (syscall_get_nr(current, regs) >= 0) { + if (syscall_get_nr(current, regs) != -1) { /* If so, check system call restarting.. */ switch (syscall_get_error(current, regs)) { case -ERESTART_RESTARTBLOCK: @@ -793,7 +793,7 @@ void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal) } /* Did we come from a system call? */ - if (syscall_get_nr(current, regs) >= 0) { + if (syscall_get_nr(current, regs) != -1) { /* Restart the system call - no handlers present */ switch (syscall_get_error(current, regs)) { case -ERESTARTNOHAND: -- GitLab From 2b8ca1a907d5fffc85fb648bbace28ddf3420825 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 10 May 2021 18:15:22 +0200 Subject: [PATCH 0430/3804] sched/core: Remove the pointless BUG_ON(!task) from wake_up_q() container_of() can never return NULL - so don't check for it pointlessly. [ mingo: Twiddled the changelog. ] Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210510161522.GA32644@redhat.com --- kernel/sched/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5226cc26a095f..61d1d85bb93da 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -585,7 +585,6 @@ void wake_up_q(struct wake_q_head *head) struct task_struct *task; task = container_of(node, struct task_struct, wake_q); - BUG_ON(!task); /* Task can safely be re-inserted now: */ node = node->next; task->wake_q.next = NULL; -- GitLab From e5e678e4fea26d73444f4427cbbaeab4fa79ecee Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Thu, 22 Apr 2021 13:02:36 -0400 Subject: [PATCH 0431/3804] sched,fair: Skip newidle_balance if a wakeup is pending The try_to_wake_up function has an optimization where it can queue a task for wakeup on its previous CPU, if the task is still in the middle of going to sleep inside schedule(). Once schedule() re-enables IRQs, the task will be woken up with an IPI, and placed back on the runqueue. If we have such a wakeup pending, there is no need to search other CPUs for runnable tasks. Just skip (or bail out early from) newidle balancing, and run the just woken up task. For a memcache like workload test, this reduces total CPU use by about 2%, proportionally split between user and system time, and p99 and p95 application response time by 10% on average. The schedstats run_delay number shows a similar improvement. Signed-off-by: Rik van Riel Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Vincent Guittot Acked-by: Mel Gorman Link: https://lkml.kernel.org/r/20210422130236.0bb353df@imladris.surriel.com --- kernel/sched/fair.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3248e24a90b0f..d10c6cc4609c0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10592,6 +10592,14 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) u64 curr_cost = 0; update_misfit_status(NULL, this_rq); + + /* + * There is a task waiting to run. No need to search for one. + * Return 0; the task will be enqueued when switching to idle. + */ + if (this_rq->ttwu_pending) + return 0; + /* * We must set idle_stamp _before_ calling idle_balance(), such that we * measure the duration of idle_balance() as idle time. @@ -10657,7 +10665,8 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) * Stop searching for tasks to pull if there are * now runnable tasks on this rq. */ - if (pulled_task || this_rq->nr_running > 0) + if (pulled_task || this_rq->nr_running > 0 || + this_rq->ttwu_pending) break; } rcu_read_unlock(); -- GitLab From 8d4c97c105ca0735b0d972d1025cb150a7008451 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Tue, 4 May 2021 10:07:42 +0100 Subject: [PATCH 0432/3804] sched/fair: Only compute base_energy_pd if necessary find_energy_efficient_cpu() searches the best energy CPU to place a task on. To do so, the energy of each performance domain (pd) is computed w/ and w/o the task placed on it. The energy of a pd w/o the task (base_energy_pd) is computed prior knowing whether a CPU is available in the pd. Move the base_energy_pd computation after looping through the CPUs of a pd and only compute it if at least one CPU is available. Suggested-by: Xuewen Yan Signed-off-by: Pierre Gondois Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Lukasz Luba Reviewed-by: Dietmar Eggemann Reviewed-by: Vincent Donnefort Link: https://lkml.kernel.org/r/20210504090743.9688-2-Pierre.Gondois@arm.com --- kernel/sched/fair.c | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d10c6cc4609c0..b229d0c238066 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6687,13 +6687,10 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) for (; pd; pd = pd->next) { unsigned long cur_delta, spare_cap, max_spare_cap = 0; + bool compute_prev_delta = false; unsigned long base_energy_pd; int max_spare_cap_cpu = -1; - /* Compute the 'base' energy of the pd, without @p */ - base_energy_pd = compute_energy(p, -1, pd); - base_energy += base_energy_pd; - for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) { if (!cpumask_test_cpu(cpu, p->cpus_ptr)) continue; @@ -6714,25 +6711,35 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) if (!fits_capacity(util, cpu_cap)) continue; - /* Always use prev_cpu as a candidate. */ if (cpu == prev_cpu) { - prev_delta = compute_energy(p, prev_cpu, pd); - prev_delta -= base_energy_pd; - best_delta = min(best_delta, prev_delta); - } - - /* - * Find the CPU with the maximum spare capacity in - * the performance domain - */ - if (spare_cap > max_spare_cap) { + /* Always use prev_cpu as a candidate. */ + compute_prev_delta = true; + } else if (spare_cap > max_spare_cap) { + /* + * Find the CPU with the maximum spare capacity + * in the performance domain. + */ max_spare_cap = spare_cap; max_spare_cap_cpu = cpu; } } - /* Evaluate the energy impact of using this CPU. */ - if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) { + if (max_spare_cap_cpu < 0 && !compute_prev_delta) + continue; + + /* Compute the 'base' energy of the pd, without @p */ + base_energy_pd = compute_energy(p, -1, pd); + base_energy += base_energy_pd; + + /* Evaluate the energy impact of using prev_cpu. */ + if (compute_prev_delta) { + prev_delta = compute_energy(p, prev_cpu, pd); + prev_delta -= base_energy_pd; + best_delta = min(best_delta, prev_delta); + } + + /* Evaluate the energy impact of using max_spare_cap_cpu. */ + if (max_spare_cap_cpu >= 0) { cur_delta = compute_energy(p, max_spare_cap_cpu, pd); cur_delta -= base_energy_pd; if (cur_delta < best_delta) { -- GitLab From 619e090c8e409e09bd3e8edcd5a73d83f689890c Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Tue, 4 May 2021 10:07:43 +0100 Subject: [PATCH 0433/3804] sched/fair: Fix negative energy delta in find_energy_efficient_cpu() find_energy_efficient_cpu() (feec()) searches the best energy CPU to place a task on. To do so, compute_energy() estimates the energy impact of placing the task on a CPU, based on CPU and task utilization signals. Utilization signals can be concurrently updated while evaluating a performance domain (pd). In some cases, this leads to having a 'negative delta', i.e. placing the task in the pd is seen as an energy gain. Thus, any further energy comparison is biased. In case of a 'negative delta', return prev_cpu since: 1. a 'negative delta' happens in less than 0.5% of feec() calls, on a Juno with 6 CPUs (4 little, 2 big) 2. it is unlikely to have two consecutive 'negative delta' for a task, so if the first call fails, feec() will correctly place the task in the next feec() call 3. EAS current behavior tends to select prev_cpu if the task doesn't raise the OPP of its current pd. prev_cpu is EAS's generic decision 4. prev_cpu should be preferred to returning an error code. In the latter case, select_idle_sibling() would do the placement, selecting a big (and not energy efficient) CPU. As 3., the task would potentially reside on the big CPU for a long time Reported-by: Xuewen Yan Suggested-by: Xuewen Yan Signed-off-by: Pierre Gondois Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Lukasz Luba Reviewed-by: Dietmar Eggemann Reviewed-by: Vincent Donnefort Link: https://lkml.kernel.org/r/20210504090743.9688-3-Pierre.Gondois@arm.com --- kernel/sched/fair.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b229d0c238066..c209f68aad612 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6661,15 +6661,15 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) { unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX; struct root_domain *rd = cpu_rq(smp_processor_id())->rd; + int cpu, best_energy_cpu = prev_cpu, target = -1; unsigned long cpu_cap, util, base_energy = 0; - int cpu, best_energy_cpu = prev_cpu; struct sched_domain *sd; struct perf_domain *pd; rcu_read_lock(); pd = rcu_dereference(rd->pd); if (!pd || READ_ONCE(rd->overutilized)) - goto fail; + goto unlock; /* * Energy-aware wake-up happens on the lowest sched_domain starting @@ -6679,7 +6679,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd))) sd = sd->parent; if (!sd) - goto fail; + goto unlock; + + target = prev_cpu; sync_entity_load_avg(&p->se); if (!task_util_est(p)) @@ -6734,6 +6736,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) /* Evaluate the energy impact of using prev_cpu. */ if (compute_prev_delta) { prev_delta = compute_energy(p, prev_cpu, pd); + if (prev_delta < base_energy_pd) + goto unlock; prev_delta -= base_energy_pd; best_delta = min(best_delta, prev_delta); } @@ -6741,6 +6745,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) /* Evaluate the energy impact of using max_spare_cap_cpu. */ if (max_spare_cap_cpu >= 0) { cur_delta = compute_energy(p, max_spare_cap_cpu, pd); + if (cur_delta < base_energy_pd) + goto unlock; cur_delta -= base_energy_pd; if (cur_delta < best_delta) { best_delta = cur_delta; @@ -6748,25 +6754,22 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) } } } -unlock: rcu_read_unlock(); /* * Pick the best CPU if prev_cpu cannot be used, or if it saves at * least 6% of the energy used by prev_cpu. */ - if (prev_delta == ULONG_MAX) - return best_energy_cpu; + if ((prev_delta == ULONG_MAX) || + (prev_delta - best_delta) > ((prev_delta + base_energy) >> 4)) + target = best_energy_cpu; - if ((prev_delta - best_delta) > ((prev_delta + base_energy) >> 4)) - return best_energy_cpu; - - return prev_cpu; + return target; -fail: +unlock: rcu_read_unlock(); - return -1; + return target; } /* -- GitLab From 4b7a08a0b6e4e910a6feee438d76e426381df0cb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 May 2021 22:43:48 +0200 Subject: [PATCH 0434/3804] delayacct: Use sched_clock() Like all scheduler statistics, use sched_clock() based time. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Rik van Riel Reviewed-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Johannes Weiner Acked-by: Balbir Singh Link: https://lkml.kernel.org/r/20210505111525.001031466@infradead.org --- kernel/delayacct.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 27725754ac991..3fe7cd52b4593 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -7,9 +7,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -42,10 +42,9 @@ void __delayacct_tsk_init(struct task_struct *tsk) * Finish delay accounting for a statistic using its timestamps (@start), * accumalator (@total) and @count */ -static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, - u32 *count) +static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count) { - s64 ns = ktime_get_ns() - *start; + s64 ns = local_clock() - *start; unsigned long flags; if (ns > 0) { @@ -58,7 +57,7 @@ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, void __delayacct_blkio_start(void) { - current->delays->blkio_start = ktime_get_ns(); + current->delays->blkio_start = local_clock(); } /* @@ -151,21 +150,20 @@ __u64 __delayacct_blkio_ticks(struct task_struct *tsk) void __delayacct_freepages_start(void) { - current->delays->freepages_start = ktime_get_ns(); + current->delays->freepages_start = local_clock(); } void __delayacct_freepages_end(void) { - delayacct_end( - ¤t->delays->lock, - ¤t->delays->freepages_start, - ¤t->delays->freepages_delay, - ¤t->delays->freepages_count); + delayacct_end(¤t->delays->lock, + ¤t->delays->freepages_start, + ¤t->delays->freepages_delay, + ¤t->delays->freepages_count); } void __delayacct_thrashing_start(void) { - current->delays->thrashing_start = ktime_get_ns(); + current->delays->thrashing_start = local_clock(); } void __delayacct_thrashing_end(void) -- GitLab From 4e29fb709885eda5f0d1fa3418e6ead01a64e46d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 May 2021 22:43:45 +0200 Subject: [PATCH 0435/3804] sched: Rename sched_info_{queued,dequeued} For consistency, rename {queued,dequeued} to {enqueue,dequeue}. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Rik van Riel Reviewed-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Johannes Weiner Acked-by: Balbir Singh Link: https://lkml.kernel.org/r/20210505111525.061402904@infradead.org --- kernel/sched/core.c | 4 ++-- kernel/sched/stats.h | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 61d1d85bb93da..660120d0a2ce1 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1595,7 +1595,7 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) update_rq_clock(rq); if (!(flags & ENQUEUE_RESTORE)) { - sched_info_queued(rq, p); + sched_info_enqueue(rq, p); psi_enqueue(p, flags & ENQUEUE_WAKEUP); } @@ -1609,7 +1609,7 @@ static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) update_rq_clock(rq); if (!(flags & DEQUEUE_SAVE)) { - sched_info_dequeued(rq, p); + sched_info_dequeue(rq, p); psi_dequeue(p, flags & DEQUEUE_SLEEP); } diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index dc218e9f45585..ee7da12a70569 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -25,7 +25,7 @@ rq_sched_info_depart(struct rq *rq, unsigned long long delta) } static inline void -rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) +rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { if (rq) rq->rq_sched_info.run_delay += delta; @@ -42,7 +42,7 @@ rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) #else /* !CONFIG_SCHEDSTATS: */ static inline void rq_sched_info_arrive (struct rq *rq, unsigned long long delta) { } -static inline void rq_sched_info_dequeued(struct rq *rq, unsigned long long delta) { } +static inline void rq_sched_info_dequeue(struct rq *rq, unsigned long long delta) { } static inline void rq_sched_info_depart (struct rq *rq, unsigned long long delta) { } # define schedstat_enabled() 0 # define __schedstat_inc(var) do { } while (0) @@ -161,7 +161,7 @@ static inline void sched_info_reset_dequeued(struct task_struct *t) * from dequeue_task() to account for possible rq->clock skew across CPUs. The * delta taken on each CPU would annul the skew. */ -static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t) +static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) { unsigned long long now = rq_clock(rq), delta = 0; @@ -172,7 +172,7 @@ static inline void sched_info_dequeued(struct rq *rq, struct task_struct *t) sched_info_reset_dequeued(t); t->sched_info.run_delay += delta; - rq_sched_info_dequeued(rq, delta); + rq_sched_info_dequeue(rq, delta); } /* @@ -197,9 +197,9 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t) /* * This function is only called from enqueue_task(), but also only updates * the timestamp if it is already not set. It's assumed that - * sched_info_dequeued() will clear that stamp when appropriate. + * sched_info_dequeue() will clear that stamp when appropriate. */ -static inline void sched_info_queued(struct rq *rq, struct task_struct *t) +static inline void sched_info_enqueue(struct rq *rq, struct task_struct *t) { if (sched_info_on()) { if (!t->sched_info.last_queued) @@ -212,7 +212,7 @@ static inline void sched_info_queued(struct rq *rq, struct task_struct *t) * due, typically, to expiring its time slice (this may also be called when * switching to the idle task). Now we can calculate how long we ran. * Also, if the process is still in the TASK_RUNNING state, call - * sched_info_queued() to mark that it has now again started waiting on + * sched_info_enqueue() to mark that it has now again started waiting on * the runqueue. */ static inline void sched_info_depart(struct rq *rq, struct task_struct *t) @@ -222,7 +222,7 @@ static inline void sched_info_depart(struct rq *rq, struct task_struct *t) rq_sched_info_depart(rq, delta); if (t->state == TASK_RUNNING) - sched_info_queued(rq, t); + sched_info_enqueue(rq, t); } /* @@ -253,9 +253,9 @@ sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *n } #else /* !CONFIG_SCHED_INFO: */ -# define sched_info_queued(rq, t) do { } while (0) +# define sched_info_enqueue(rq, t) do { } while (0) # define sched_info_reset_dequeued(t) do { } while (0) -# define sched_info_dequeued(rq, t) do { } while (0) +# define sched_info_dequeue(rq, t) do { } while (0) # define sched_info_depart(rq, t) do { } while (0) # define sched_info_arrive(rq, next) do { } while (0) # define sched_info_switch(rq, t, next) do { } while (0) -- GitLab From c5895d3f06cbb80ccb311f1dcb37074651030cb6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 May 2021 22:43:42 +0200 Subject: [PATCH 0436/3804] sched: Simplify sched_info_on() The situation around sched_info is somewhat complicated, it is used by sched_stats and delayacct and, indirectly, kvm. If SCHEDSTATS=Y (but disabled by default) sched_info_on() is unconditionally true -- this is the case for all distro kernel configs I checked. If for some reason SCHEDSTATS=N, but TASK_DELAY_ACCT=Y, then sched_info_on() can return false when delayacct is disabled, presumably because there would be no other users left; except kvm is. Instead of complicating matters further by accurately accounting sched_stat and kvm state, simply unconditionally enable when SCHED_INFO=Y, matching the common distro case. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20210505111525.121458839@infradead.org --- include/linux/sched/stat.h | 10 ++-------- kernel/delayacct.c | 1 - kernel/sched/stats.h | 37 ++++++++++--------------------------- 3 files changed, 12 insertions(+), 36 deletions(-) diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h index 568286411b43a..939c3ec9e1b90 100644 --- a/include/linux/sched/stat.h +++ b/include/linux/sched/stat.h @@ -3,6 +3,7 @@ #define _LINUX_SCHED_STAT_H #include +#include /* * Various counters maintained by the scheduler and fork(), @@ -23,14 +24,7 @@ extern unsigned long nr_iowait_cpu(int cpu); static inline int sched_info_on(void) { -#ifdef CONFIG_SCHEDSTATS - return 1; -#elif defined(CONFIG_TASK_DELAY_ACCT) - extern int delayacct_on; - return delayacct_on; -#else - return 0; -#endif + return IS_ENABLED(CONFIG_SCHED_INFO); } #ifdef CONFIG_SCHEDSTATS diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 3fe7cd52b4593..3a0b910386d15 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -15,7 +15,6 @@ #include int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ -EXPORT_SYMBOL_GPL(delayacct_on); struct kmem_cache *delayacct_cache; static int __init delayacct_setup_disable(char *str) diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index ee7da12a70569..33ffd41935bab 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -150,11 +150,6 @@ static inline void psi_sched_switch(struct task_struct *prev, #endif /* CONFIG_PSI */ #ifdef CONFIG_SCHED_INFO -static inline void sched_info_reset_dequeued(struct task_struct *t) -{ - t->sched_info.last_queued = 0; -} - /* * We are interested in knowing how long it was from the *first* time a * task was queued to the time that it finally hit a CPU, we call this routine @@ -163,13 +158,12 @@ static inline void sched_info_reset_dequeued(struct task_struct *t) */ static inline void sched_info_dequeue(struct rq *rq, struct task_struct *t) { - unsigned long long now = rq_clock(rq), delta = 0; + unsigned long long delta = 0; - if (sched_info_on()) { - if (t->sched_info.last_queued) - delta = now - t->sched_info.last_queued; + if (t->sched_info.last_queued) { + delta = rq_clock(rq) - t->sched_info.last_queued; + t->sched_info.last_queued = 0; } - sched_info_reset_dequeued(t); t->sched_info.run_delay += delta; rq_sched_info_dequeue(rq, delta); @@ -184,9 +178,10 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t) { unsigned long long now = rq_clock(rq), delta = 0; - if (t->sched_info.last_queued) + if (t->sched_info.last_queued) { delta = now - t->sched_info.last_queued; - sched_info_reset_dequeued(t); + t->sched_info.last_queued = 0; + } t->sched_info.run_delay += delta; t->sched_info.last_arrival = now; t->sched_info.pcount++; @@ -201,10 +196,8 @@ static void sched_info_arrive(struct rq *rq, struct task_struct *t) */ static inline void sched_info_enqueue(struct rq *rq, struct task_struct *t) { - if (sched_info_on()) { - if (!t->sched_info.last_queued) - t->sched_info.last_queued = rq_clock(rq); - } + if (!t->sched_info.last_queued) + t->sched_info.last_queued = rq_clock(rq); } /* @@ -231,7 +224,7 @@ static inline void sched_info_depart(struct rq *rq, struct task_struct *t) * the idle task.) We are only called when prev != next. */ static inline void -__sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) +sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) { /* * prev now departs the CPU. It's not interesting to record @@ -245,18 +238,8 @@ __sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct sched_info_arrive(rq, next); } -static inline void -sched_info_switch(struct rq *rq, struct task_struct *prev, struct task_struct *next) -{ - if (sched_info_on()) - __sched_info_switch(rq, prev, next); -} - #else /* !CONFIG_SCHED_INFO: */ # define sched_info_enqueue(rq, t) do { } while (0) -# define sched_info_reset_dequeued(t) do { } while (0) # define sched_info_dequeue(rq, t) do { } while (0) -# define sched_info_depart(rq, t) do { } while (0) -# define sched_info_arrive(rq, next) do { } while (0) # define sched_info_switch(rq, t, next) do { } while (0) #endif /* CONFIG_SCHED_INFO */ -- GitLab From 63b3f96e1a989846a5a521d4fbef4bc86406929d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 May 2021 22:43:39 +0200 Subject: [PATCH 0437/3804] kvm: Select SCHED_INFO instead of TASK_DELAY_ACCT AFAICT KVM only relies on SCHED_INFO. Nothing uses the p->delays data that belongs to TASK_DELAY_ACCT. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Paolo Bonzini Acked-by: Marc Zyngier Acked-by: Balbir Singh Link: https://lkml.kernel.org/r/20210505111525.187225172@infradead.org --- arch/arm64/kvm/Kconfig | 5 +---- arch/x86/kvm/Kconfig | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 3964acf5451ea..a4eba0908bfac 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -20,8 +20,6 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" depends on OF - # for TASKSTATS/TASK_DELAY_ACCT: - depends on NET && MULTIUSER select MMU_NOTIFIER select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT @@ -38,8 +36,7 @@ menuconfig KVM select IRQ_BYPASS_MANAGER select HAVE_KVM_IRQ_BYPASS select HAVE_KVM_VCPU_RUN_PID_CHANGE - select TASKSTATS - select TASK_DELAY_ACCT + select SCHED_INFO help Support hosting virtualized guest machines. diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index f6b93a35ce145..fb8efb387aff5 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -22,8 +22,6 @@ config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM depends on HIGH_RES_TIMERS - # for TASKSTATS/TASK_DELAY_ACCT: - depends on NET && MULTIUSER depends on X86_LOCAL_APIC select PREEMPT_NOTIFIERS select MMU_NOTIFIER @@ -36,8 +34,7 @@ config KVM select KVM_ASYNC_PF select USER_RETURN_NOTIFIER select KVM_MMIO - select TASKSTATS - select TASK_DELAY_ACCT + select SCHED_INFO select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT -- GitLab From eee4d9fee2544389e5ce5697ed92db67c86d7a9f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 May 2021 22:43:36 +0200 Subject: [PATCH 0438/3804] delayacct: Add static_branch in scheduler hooks Cheaper when delayacct is disabled. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Balbir Singh Acked-by: Johannes Weiner Link: https://lkml.kernel.org/r/20210505111525.248028369@infradead.org --- include/linux/delayacct.h | 8 ++++++++ kernel/delayacct.c | 3 +++ 2 files changed, 11 insertions(+) diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 21651f9467515..57fefa54b53a0 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -58,8 +58,10 @@ struct task_delay_info { #include #include +#include #ifdef CONFIG_TASK_DELAY_ACCT +DECLARE_STATIC_KEY_TRUE(delayacct_key); extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); @@ -114,6 +116,9 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) static inline void delayacct_blkio_start(void) { + if (!static_branch_likely(&delayacct_key)) + return; + delayacct_set_flag(current, DELAYACCT_PF_BLKIO); if (current->delays) __delayacct_blkio_start(); @@ -121,6 +126,9 @@ static inline void delayacct_blkio_start(void) static inline void delayacct_blkio_end(struct task_struct *p) { + if (!static_branch_likely(&delayacct_key)) + return; + if (p->delays) __delayacct_blkio_end(p); delayacct_clear_flag(p, DELAYACCT_PF_BLKIO); diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 3a0b910386d15..63012fd39ae23 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -14,6 +14,7 @@ #include #include +DEFINE_STATIC_KEY_TRUE(delayacct_key); int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ struct kmem_cache *delayacct_cache; @@ -28,6 +29,8 @@ void delayacct_init(void) { delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT); delayacct_tsk_init(&init_task); + if (!delayacct_on) + static_branch_disable(&delayacct_key); } void __delayacct_tsk_init(struct task_struct *tsk) -- GitLab From e4042ad492357fa995921376462b04a025dd53b6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 4 May 2021 22:43:32 +0200 Subject: [PATCH 0439/3804] delayacct: Default disabled Assuming this stuff isn't actually used much; disable it by default and avoid allocating and tracking the task_delay_info structure. taskstats is changed to still report the regular sched and sched_info and only skip the missing task_delay_info fields instead of not reporting anything. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: Ingo Molnar Link: https://lkml.kernel.org/r/20210505111525.308018373@infradead.org --- Documentation/accounting/delay-accounting.rst | 8 ++++---- .../admin-guide/kernel-parameters.txt | 2 +- include/linux/delayacct.h | 16 ++++------------ kernel/delayacct.c | 19 +++++++++++-------- 4 files changed, 20 insertions(+), 25 deletions(-) diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index 7cc7f5852da0f..f20b282d6de08 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -69,13 +69,13 @@ Compile the kernel with:: CONFIG_TASK_DELAY_ACCT=y CONFIG_TASKSTATS=y -Delay accounting is enabled by default at boot up. -To disable, add:: +Delay accounting is disabled by default at boot up. +To enable, add:: - nodelayacct + delayacct to the kernel boot options. The rest of the instructions -below assume this has not been done. +below assume this has been done. After the system has booted up, use a utility similar to getdelays.c to access the delays diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cb89dbdedc463..ef5048c127a3c 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3244,7 +3244,7 @@ noclflush [BUGS=X86] Don't use the CLFLUSH instruction - nodelayacct [KNL] Disable per-task delay accounting + delayacct [KNL] Enable per-task delay accounting nodsp [SH] Disable hardware DSP at boot time. diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 57fefa54b53a0..225c8e01a1117 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -61,7 +61,7 @@ struct task_delay_info { #include #ifdef CONFIG_TASK_DELAY_ACCT -DECLARE_STATIC_KEY_TRUE(delayacct_key); +DECLARE_STATIC_KEY_FALSE(delayacct_key); extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); @@ -69,7 +69,7 @@ extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); extern void __delayacct_blkio_end(struct task_struct *); -extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); +extern int delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); extern void __delayacct_freepages_start(void); extern void __delayacct_freepages_end(void); @@ -116,7 +116,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) static inline void delayacct_blkio_start(void) { - if (!static_branch_likely(&delayacct_key)) + if (!static_branch_unlikely(&delayacct_key)) return; delayacct_set_flag(current, DELAYACCT_PF_BLKIO); @@ -126,7 +126,7 @@ static inline void delayacct_blkio_start(void) static inline void delayacct_blkio_end(struct task_struct *p) { - if (!static_branch_likely(&delayacct_key)) + if (!static_branch_unlikely(&delayacct_key)) return; if (p->delays) @@ -134,14 +134,6 @@ static inline void delayacct_blkio_end(struct task_struct *p) delayacct_clear_flag(p, DELAYACCT_PF_BLKIO); } -static inline int delayacct_add_tsk(struct taskstats *d, - struct task_struct *tsk) -{ - if (!delayacct_on || !tsk->delays) - return 0; - return __delayacct_add_tsk(d, tsk); -} - static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) { if (tsk->delays) diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 63012fd39ae23..3f086903b2954 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -14,23 +14,23 @@ #include #include -DEFINE_STATIC_KEY_TRUE(delayacct_key); -int delayacct_on __read_mostly = 1; /* Delay accounting turned on/off */ +DEFINE_STATIC_KEY_FALSE(delayacct_key); +int delayacct_on __read_mostly; /* Delay accounting turned on/off */ struct kmem_cache *delayacct_cache; -static int __init delayacct_setup_disable(char *str) +static int __init delayacct_setup_enable(char *str) { - delayacct_on = 0; + delayacct_on = 1; return 1; } -__setup("nodelayacct", delayacct_setup_disable); +__setup("delayacct", delayacct_setup_enable); void delayacct_init(void) { delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT); delayacct_tsk_init(&init_task); - if (!delayacct_on) - static_branch_disable(&delayacct_key); + if (delayacct_on) + static_branch_enable(&delayacct_key); } void __delayacct_tsk_init(struct task_struct *tsk) @@ -83,7 +83,7 @@ void __delayacct_blkio_end(struct task_struct *p) delayacct_end(&delays->lock, &delays->blkio_start, total, count); } -int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) +int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { u64 utime, stime, stimescaled, utimescaled; unsigned long long t2, t3; @@ -118,6 +118,9 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) d->cpu_run_virtual_total = (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; + if (!tsk->delays) + return 0; + /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ raw_spin_lock_irqsave(&tsk->delays->lock, flags); -- GitLab From 0cd7c741f01de13dc1eecf22557593b3514639bb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 10 May 2021 14:01:00 +0200 Subject: [PATCH 0440/3804] delayacct: Add sysctl to enable at runtime Just like sched_schedstats, allow runtime enabling (and disabling) of delayacct. This is useful if one forgot to add the delayacct boot time option. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/YJkhebGJAywaZowX@hirez.programming.kicks-ass.net --- Documentation/accounting/delay-accounting.rst | 6 ++-- include/linux/delayacct.h | 4 +++ kernel/delayacct.c | 36 +++++++++++++++++-- kernel/sysctl.c | 12 +++++++ 4 files changed, 54 insertions(+), 4 deletions(-) diff --git a/Documentation/accounting/delay-accounting.rst b/Documentation/accounting/delay-accounting.rst index f20b282d6de08..1b8b46deeb299 100644 --- a/Documentation/accounting/delay-accounting.rst +++ b/Documentation/accounting/delay-accounting.rst @@ -74,8 +74,10 @@ To enable, add:: delayacct -to the kernel boot options. The rest of the instructions -below assume this has been done. +to the kernel boot options. The rest of the instructions below assume this has +been done. Alternatively, use sysctl kernel.task_delayacct to switch the state +at runtime. Note however that only tasks started after enabling it will have +delayacct information. After the system has booted up, use a utility similar to getdelays.c to access the delays diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 225c8e01a1117..af7e6eb502837 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -65,6 +65,10 @@ DECLARE_STATIC_KEY_FALSE(delayacct_key); extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); + +extern int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); + extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 3f086903b2954..51530d5b15a8a 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -18,6 +18,17 @@ DEFINE_STATIC_KEY_FALSE(delayacct_key); int delayacct_on __read_mostly; /* Delay accounting turned on/off */ struct kmem_cache *delayacct_cache; +static void set_delayacct(bool enabled) +{ + if (enabled) { + static_branch_enable(&delayacct_key); + delayacct_on = 1; + } else { + delayacct_on = 0; + static_branch_disable(&delayacct_key); + } +} + static int __init delayacct_setup_enable(char *str) { delayacct_on = 1; @@ -29,9 +40,30 @@ void delayacct_init(void) { delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT); delayacct_tsk_init(&init_task); - if (delayacct_on) - static_branch_enable(&delayacct_key); + set_delayacct(delayacct_on); +} + +#ifdef CONFIG_PROC_SYSCTL +int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + int state = delayacct_on; + struct ctl_table t; + int err; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + t = *table; + t.data = &state; + err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); + if (err < 0) + return err; + if (write) + set_delayacct(state); + return err; } +#endif void __delayacct_tsk_init(struct task_struct *tsk) { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 14edf84cc571f..0afbfc83157a4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -71,6 +71,7 @@ #include #include #include +#include #include "../lib/kstrtox.h" @@ -1727,6 +1728,17 @@ static struct ctl_table kern_table[] = { .extra2 = SYSCTL_ONE, }, #endif /* CONFIG_SCHEDSTATS */ +#ifdef CONFIG_TASK_DELAY_ACCT + { + .procname = "task_delayacct", + .data = NULL, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = sysctl_delayacct, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_TASK_DELAY_ACCT */ #ifdef CONFIG_NUMA_BALANCING { .procname = "numa_balancing", -- GitLab From 9099a14708ce1dfecb6002605594a0daa319b555 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 18:19:35 -0500 Subject: [PATCH 0441/3804] sched/fair: Add a few assertions Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.015639083@infradead.org --- kernel/sched/fair.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c209f68aad612..6bdbb7bb0d66d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6288,6 +6288,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) task_util = uclamp_task_util(p); } + /* + * per-cpu select_idle_mask usage + */ + lockdep_assert_irqs_disabled(); + if ((available_idle_cpu(target) || sched_idle_cpu(target)) && asym_fits_capacity(task_util, target)) return target; @@ -6781,8 +6786,6 @@ unlock: * certain conditions an idle sibling CPU if the domain has SD_WAKE_AFFINE set. * * Returns the target CPU number. - * - * preempt must be disabled. */ static int select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) @@ -6795,6 +6798,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) /* SD_flags and WF_flags share the first nibble */ int sd_flag = wake_flags & 0xF; + /* + * required for stable ->cpus_allowed + */ + lockdep_assert_held(&p->pi_lock); if (wake_flags & WF_TTWU) { record_wakee(p); -- GitLab From 39d371b7c0c299d489041884d005aacc4bba8c15 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Mar 2021 12:13:13 +0100 Subject: [PATCH 0442/3804] sched: Provide raw_spin_rq_*lock*() helpers In prepration for playing games with rq->lock, add some rq_lock wrappers. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.075967879@infradead.org --- kernel/sched/core.c | 15 +++++++++++++ kernel/sched/sched.h | 50 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 660120d0a2ce1..5568018222d9c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -184,6 +184,21 @@ int sysctl_sched_rt_runtime = 950000; * */ +void raw_spin_rq_lock_nested(struct rq *rq, int subclass) +{ + raw_spin_lock_nested(rq_lockp(rq), subclass); +} + +bool raw_spin_rq_trylock(struct rq *rq) +{ + return raw_spin_trylock(rq_lockp(rq)); +} + +void raw_spin_rq_unlock(struct rq *rq) +{ + raw_spin_unlock(rq_lockp(rq)); +} + /* * __task_rq_lock - lock the rq @p resides on. */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a189bec137291..f654587106355 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1113,6 +1113,56 @@ static inline bool is_migration_disabled(struct task_struct *p) #endif } +static inline raw_spinlock_t *rq_lockp(struct rq *rq) +{ + return &rq->lock; +} + +static inline void lockdep_assert_rq_held(struct rq *rq) +{ + lockdep_assert_held(rq_lockp(rq)); +} + +extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass); +extern bool raw_spin_rq_trylock(struct rq *rq); +extern void raw_spin_rq_unlock(struct rq *rq); + +static inline void raw_spin_rq_lock(struct rq *rq) +{ + raw_spin_rq_lock_nested(rq, 0); +} + +static inline void raw_spin_rq_lock_irq(struct rq *rq) +{ + local_irq_disable(); + raw_spin_rq_lock(rq); +} + +static inline void raw_spin_rq_unlock_irq(struct rq *rq) +{ + raw_spin_rq_unlock(rq); + local_irq_enable(); +} + +static inline unsigned long _raw_spin_rq_lock_irqsave(struct rq *rq) +{ + unsigned long flags; + local_irq_save(flags); + raw_spin_rq_lock(rq); + return flags; +} + +static inline void raw_spin_rq_unlock_irqrestore(struct rq *rq, unsigned long flags) +{ + raw_spin_rq_unlock(rq); + local_irq_restore(flags); +} + +#define raw_spin_rq_lock_irqsave(rq, flags) \ +do { \ + flags = _raw_spin_rq_lock_irqsave(rq); \ +} while (0) + #ifdef CONFIG_SCHED_SMT extern void __update_idle_core(struct rq *rq); -- GitLab From 5cb9eaa3d274f75539077a28cf01e3563195fa53 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 18:19:31 -0500 Subject: [PATCH 0443/3804] sched: Wrap rq::lock access In preparation of playing games with rq->lock, abstract the thing using an accessor. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.136465446@infradead.org --- kernel/sched/core.c | 70 +++++++++++++-------------- kernel/sched/cpuacct.c | 12 ++--- kernel/sched/deadline.c | 22 ++++----- kernel/sched/debug.c | 4 +- kernel/sched/fair.c | 35 +++++++------- kernel/sched/idle.c | 4 +- kernel/sched/pelt.h | 2 +- kernel/sched/rt.c | 16 +++--- kernel/sched/sched.h | 105 ++++++++++++++++++++-------------------- kernel/sched/topology.c | 4 +- 10 files changed, 136 insertions(+), 138 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5568018222d9c..5e6f5f5750a32 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -211,12 +211,12 @@ struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) for (;;) { rq = task_rq(p); - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { rq_pin_lock(rq, rf); return rq; } - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); while (unlikely(task_on_rq_migrating(p))) cpu_relax(); @@ -235,7 +235,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) for (;;) { raw_spin_lock_irqsave(&p->pi_lock, rf->flags); rq = task_rq(p); - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); /* * move_queued_task() task_rq_lock() * @@ -257,7 +257,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) rq_pin_lock(rq, rf); return rq; } - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); while (unlikely(task_on_rq_migrating(p))) @@ -327,7 +327,7 @@ void update_rq_clock(struct rq *rq) { s64 delta; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); if (rq->clock_update_flags & RQCF_ACT_SKIP) return; @@ -625,7 +625,7 @@ void resched_curr(struct rq *rq) struct task_struct *curr = rq->curr; int cpu; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); if (test_tsk_need_resched(curr)) return; @@ -649,10 +649,10 @@ void resched_cpu(int cpu) struct rq *rq = cpu_rq(cpu); unsigned long flags; - raw_spin_lock_irqsave(&rq->lock, flags); + raw_spin_rq_lock_irqsave(rq, flags); if (cpu_online(cpu) || cpu == smp_processor_id()) resched_curr(rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); + raw_spin_rq_unlock_irqrestore(rq, flags); } #ifdef CONFIG_SMP @@ -1151,7 +1151,7 @@ static inline void uclamp_rq_inc_id(struct rq *rq, struct task_struct *p, struct uclamp_se *uc_se = &p->uclamp[clamp_id]; struct uclamp_bucket *bucket; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); /* Update task effective clamp */ p->uclamp[clamp_id] = uclamp_eff_get(p, clamp_id); @@ -1191,7 +1191,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p, unsigned int bkt_clamp; unsigned int rq_clamp; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); /* * If sched_uclamp_used was enabled after task @p was enqueued, @@ -1864,7 +1864,7 @@ static inline bool is_cpu_allowed(struct task_struct *p, int cpu) static struct rq *move_queued_task(struct rq *rq, struct rq_flags *rf, struct task_struct *p, int new_cpu) { - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); deactivate_task(rq, p, DEQUEUE_NOCLOCK); set_task_cpu(p, new_cpu); @@ -2038,7 +2038,7 @@ int push_cpu_stop(void *arg) struct task_struct *p = arg; raw_spin_lock_irq(&p->pi_lock); - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); if (task_rq(p) != rq) goto out_unlock; @@ -2068,7 +2068,7 @@ int push_cpu_stop(void *arg) out_unlock: rq->push_busy = false; - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); raw_spin_unlock_irq(&p->pi_lock); put_task_struct(p); @@ -2121,7 +2121,7 @@ __do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask, u32 * Because __kthread_bind() calls this on blocked tasks without * holding rq->lock. */ - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); dequeue_task(rq, p, DEQUEUE_SAVE | DEQUEUE_NOCLOCK); } if (running) @@ -2462,7 +2462,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) * task_rq_lock(). */ WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || - lockdep_is_held(&task_rq(p)->lock))); + lockdep_is_held(rq_lockp(task_rq(p))))); #endif /* * Clearly, migrating tasks to offline CPUs is a fairly daft thing. @@ -3004,7 +3004,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, { int en_flags = ENQUEUE_WAKEUP | ENQUEUE_NOCLOCK; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); if (p->sched_contributes_to_load) rq->nr_uninterruptible--; @@ -4015,7 +4015,7 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head) void (*func)(struct rq *rq); struct callback_head *next; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); while (head) { func = (void (*)(struct rq *))head->func; @@ -4038,7 +4038,7 @@ static inline struct callback_head *splice_balance_callbacks(struct rq *rq) { struct callback_head *head = rq->balance_callback; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); if (head) rq->balance_callback = NULL; @@ -4055,9 +4055,9 @@ static inline void balance_callbacks(struct rq *rq, struct callback_head *head) unsigned long flags; if (unlikely(head)) { - raw_spin_lock_irqsave(&rq->lock, flags); + raw_spin_rq_lock_irqsave(rq, flags); do_balance_callbacks(rq, head); - raw_spin_unlock_irqrestore(&rq->lock, flags); + raw_spin_rq_unlock_irqrestore(rq, flags); } } @@ -4088,10 +4088,10 @@ prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf * do an early lockdep release here: */ rq_unpin_lock(rq, rf); - spin_release(&rq->lock.dep_map, _THIS_IP_); + spin_release(&rq_lockp(rq)->dep_map, _THIS_IP_); #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ - rq->lock.owner = next; + rq_lockp(rq)->owner = next; #endif } @@ -4102,9 +4102,9 @@ static inline void finish_lock_switch(struct rq *rq) * fix up the runqueue lock - which gets 'carried over' from * prev into current: */ - spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); + spin_acquire(&rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_); __balance_callbacks(rq); - raw_spin_unlock_irq(&rq->lock); + raw_spin_rq_unlock_irq(rq); } /* @@ -5164,7 +5164,7 @@ static void __sched notrace __schedule(bool preempt) rq_unpin_lock(rq, &rf); __balance_callbacks(rq); - raw_spin_unlock_irq(&rq->lock); + raw_spin_rq_unlock_irq(rq); } } @@ -5706,7 +5706,7 @@ out_unlock: rq_unpin_lock(rq, &rf); __balance_callbacks(rq); - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); preempt_enable(); } @@ -7456,7 +7456,7 @@ void init_idle(struct task_struct *idle, int cpu) __sched_fork(0, idle); raw_spin_lock_irqsave(&idle->pi_lock, flags); - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); idle->state = TASK_RUNNING; idle->se.exec_start = sched_clock(); @@ -7494,7 +7494,7 @@ void init_idle(struct task_struct *idle, int cpu) #ifdef CONFIG_SMP idle->on_cpu = 1; #endif - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); raw_spin_unlock_irqrestore(&idle->pi_lock, flags); /* Set the preempt count _outside_ the spinlocks! */ @@ -7660,7 +7660,7 @@ static void balance_push(struct rq *rq) { struct task_struct *push_task = rq->curr; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); SCHED_WARN_ON(rq->cpu != smp_processor_id()); /* @@ -7698,9 +7698,9 @@ static void balance_push(struct rq *rq) */ if (!rq->nr_running && !rq_has_pinned_tasks(rq) && rcuwait_active(&rq->hotplug_wait)) { - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); rcuwait_wake_up(&rq->hotplug_wait); - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); } return; } @@ -7710,7 +7710,7 @@ static void balance_push(struct rq *rq) * Temporarily drop rq->lock such that we can wake-up the stop task. * Both preemption and IRQs are still disabled. */ - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); stop_one_cpu_nowait(rq->cpu, __balance_push_cpu_stop, push_task, this_cpu_ptr(&push_work)); /* @@ -7718,7 +7718,7 @@ static void balance_push(struct rq *rq) * schedule(). The next pick is obviously going to be the stop task * which kthread_is_per_cpu() and will push this task away. */ - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); } static void balance_push_set(int cpu, bool on) @@ -8008,7 +8008,7 @@ static void dump_rq_tasks(struct rq *rq, const char *loglvl) struct task_struct *g, *p; int cpu = cpu_of(rq); - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); printk("%sCPU%d enqueued tasks (%u total):\n", loglvl, cpu, rq->nr_running); for_each_process_thread(g, p) { @@ -8181,7 +8181,7 @@ void __init sched_init(void) struct rq *rq; rq = cpu_rq(i); - raw_spin_lock_init(&rq->lock); + raw_spin_lock_init(&rq->__lock); rq->nr_running = 0; rq->calc_load_active = 0; rq->calc_load_update = jiffies + LOAD_FREQ; diff --git a/kernel/sched/cpuacct.c b/kernel/sched/cpuacct.c index 104a1bade14f8..893eece65bfda 100644 --- a/kernel/sched/cpuacct.c +++ b/kernel/sched/cpuacct.c @@ -112,7 +112,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, /* * Take rq->lock to make 64-bit read safe on 32-bit platforms. */ - raw_spin_lock_irq(&cpu_rq(cpu)->lock); + raw_spin_rq_lock_irq(cpu_rq(cpu)); #endif if (index == CPUACCT_STAT_NSTATS) { @@ -126,7 +126,7 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, } #ifndef CONFIG_64BIT - raw_spin_unlock_irq(&cpu_rq(cpu)->lock); + raw_spin_rq_unlock_irq(cpu_rq(cpu)); #endif return data; @@ -141,14 +141,14 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) /* * Take rq->lock to make 64-bit write safe on 32-bit platforms. */ - raw_spin_lock_irq(&cpu_rq(cpu)->lock); + raw_spin_rq_lock_irq(cpu_rq(cpu)); #endif for (i = 0; i < CPUACCT_STAT_NSTATS; i++) cpuusage->usages[i] = val; #ifndef CONFIG_64BIT - raw_spin_unlock_irq(&cpu_rq(cpu)->lock); + raw_spin_rq_unlock_irq(cpu_rq(cpu)); #endif } @@ -253,13 +253,13 @@ static int cpuacct_all_seq_show(struct seq_file *m, void *V) * Take rq->lock to make 64-bit read safe on 32-bit * platforms. */ - raw_spin_lock_irq(&cpu_rq(cpu)->lock); + raw_spin_rq_lock_irq(cpu_rq(cpu)); #endif seq_printf(m, " %llu", cpuusage->usages[index]); #ifndef CONFIG_64BIT - raw_spin_unlock_irq(&cpu_rq(cpu)->lock); + raw_spin_rq_unlock_irq(cpu_rq(cpu)); #endif } seq_puts(m, "\n"); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 9a2989749b8d1..6e99b8b37c8c8 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -157,7 +157,7 @@ void __add_running_bw(u64 dl_bw, struct dl_rq *dl_rq) { u64 old = dl_rq->running_bw; - lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock); + lockdep_assert_rq_held(rq_of_dl_rq(dl_rq)); dl_rq->running_bw += dl_bw; SCHED_WARN_ON(dl_rq->running_bw < old); /* overflow */ SCHED_WARN_ON(dl_rq->running_bw > dl_rq->this_bw); @@ -170,7 +170,7 @@ void __sub_running_bw(u64 dl_bw, struct dl_rq *dl_rq) { u64 old = dl_rq->running_bw; - lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock); + lockdep_assert_rq_held(rq_of_dl_rq(dl_rq)); dl_rq->running_bw -= dl_bw; SCHED_WARN_ON(dl_rq->running_bw > old); /* underflow */ if (dl_rq->running_bw > old) @@ -184,7 +184,7 @@ void __add_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) { u64 old = dl_rq->this_bw; - lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock); + lockdep_assert_rq_held(rq_of_dl_rq(dl_rq)); dl_rq->this_bw += dl_bw; SCHED_WARN_ON(dl_rq->this_bw < old); /* overflow */ } @@ -194,7 +194,7 @@ void __sub_rq_bw(u64 dl_bw, struct dl_rq *dl_rq) { u64 old = dl_rq->this_bw; - lockdep_assert_held(&(rq_of_dl_rq(dl_rq))->lock); + lockdep_assert_rq_held(rq_of_dl_rq(dl_rq)); dl_rq->this_bw -= dl_bw; SCHED_WARN_ON(dl_rq->this_bw > old); /* underflow */ if (dl_rq->this_bw > old) @@ -987,7 +987,7 @@ static int start_dl_timer(struct task_struct *p) ktime_t now, act; s64 delta; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); /* * We want the timer to fire at the deadline, but considering @@ -1097,9 +1097,9 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) * If the runqueue is no longer available, migrate the * task elsewhere. This necessarily changes rq. */ - lockdep_unpin_lock(&rq->lock, rf.cookie); + lockdep_unpin_lock(rq_lockp(rq), rf.cookie); rq = dl_task_offline_migration(rq, p); - rf.cookie = lockdep_pin_lock(&rq->lock); + rf.cookie = lockdep_pin_lock(rq_lockp(rq)); update_rq_clock(rq); /* @@ -1731,7 +1731,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused * from try_to_wake_up(). Hence, p->pi_lock is locked, but * rq->lock is not... So, lock it */ - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); if (p->dl.dl_non_contending) { sub_running_bw(&p->dl, &rq->dl); p->dl.dl_non_contending = 0; @@ -1746,7 +1746,7 @@ static void migrate_task_rq_dl(struct task_struct *p, int new_cpu __maybe_unused put_task_struct(p); } sub_rq_bw(&p->dl, &rq->dl); - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); } static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) @@ -2291,10 +2291,10 @@ skip: double_unlock_balance(this_rq, src_rq); if (push_task) { - raw_spin_unlock(&this_rq->lock); + raw_spin_rq_unlock(this_rq); stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop, push_task, &src_rq->push_work); - raw_spin_lock(&this_rq->lock); + raw_spin_rq_lock(this_rq); } } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 9c882f20803e0..3bdee5fd7d292 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -576,7 +576,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); - raw_spin_lock_irqsave(&rq->lock, flags); + raw_spin_rq_lock_irqsave(rq, flags); if (rb_first_cached(&cfs_rq->tasks_timeline)) MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime; last = __pick_last_entity(cfs_rq); @@ -584,7 +584,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) max_vruntime = last->vruntime; min_vruntime = cfs_rq->min_vruntime; rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime; - raw_spin_unlock_irqrestore(&rq->lock, flags); + raw_spin_rq_unlock_irqrestore(rq, flags); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", SPLIT_NS(MIN_vruntime)); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "min_vruntime", diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6bdbb7bb0d66d..e50bd75067d58 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1107,7 +1107,7 @@ struct numa_group { static struct numa_group *deref_task_numa_group(struct task_struct *p) { return rcu_dereference_check(p->numa_group, p == current || - (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu))); + (lockdep_is_held(rq_lockp(task_rq(p))) && !READ_ONCE(p->on_cpu))); } static struct numa_group *deref_curr_numa_group(struct task_struct *p) @@ -5328,7 +5328,7 @@ static void __maybe_unused update_runtime_enabled(struct rq *rq) { struct task_group *tg; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); rcu_read_lock(); list_for_each_entry_rcu(tg, &task_groups, list) { @@ -5347,7 +5347,7 @@ static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq) { struct task_group *tg; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); rcu_read_lock(); list_for_each_entry_rcu(tg, &task_groups, list) { @@ -6891,7 +6891,7 @@ static void migrate_task_rq_fair(struct task_struct *p, int new_cpu) * In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old' * rq->lock and can modify state directly. */ - lockdep_assert_held(&task_rq(p)->lock); + lockdep_assert_rq_held(task_rq(p)); detach_entity_cfs_rq(&p->se); } else { @@ -7518,7 +7518,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env) { s64 delta; - lockdep_assert_held(&env->src_rq->lock); + lockdep_assert_rq_held(env->src_rq); if (p->sched_class != &fair_sched_class) return 0; @@ -7616,7 +7616,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) { int tsk_cache_hot; - lockdep_assert_held(&env->src_rq->lock); + lockdep_assert_rq_held(env->src_rq); /* * We do not migrate tasks that are: @@ -7705,7 +7705,7 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) */ static void detach_task(struct task_struct *p, struct lb_env *env) { - lockdep_assert_held(&env->src_rq->lock); + lockdep_assert_rq_held(env->src_rq); deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK); set_task_cpu(p, env->dst_cpu); @@ -7721,7 +7721,7 @@ static struct task_struct *detach_one_task(struct lb_env *env) { struct task_struct *p; - lockdep_assert_held(&env->src_rq->lock); + lockdep_assert_rq_held(env->src_rq); list_for_each_entry_reverse(p, &env->src_rq->cfs_tasks, se.group_node) { @@ -7757,7 +7757,7 @@ static int detach_tasks(struct lb_env *env) struct task_struct *p; int detached = 0; - lockdep_assert_held(&env->src_rq->lock); + lockdep_assert_rq_held(env->src_rq); /* * Source run queue has been emptied by another CPU, clear @@ -7887,7 +7887,7 @@ next: */ static void attach_task(struct rq *rq, struct task_struct *p) { - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); BUG_ON(task_rq(p) != rq); activate_task(rq, p, ENQUEUE_NOCLOCK); @@ -9798,7 +9798,7 @@ more_balance: if (need_active_balance(&env)) { unsigned long flags; - raw_spin_lock_irqsave(&busiest->lock, flags); + raw_spin_rq_lock_irqsave(busiest, flags); /* * Don't kick the active_load_balance_cpu_stop, @@ -9806,8 +9806,7 @@ more_balance: * moved to this_cpu: */ if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) { - raw_spin_unlock_irqrestore(&busiest->lock, - flags); + raw_spin_rq_unlock_irqrestore(busiest, flags); goto out_one_pinned; } @@ -9824,7 +9823,7 @@ more_balance: busiest->push_cpu = this_cpu; active_balance = 1; } - raw_spin_unlock_irqrestore(&busiest->lock, flags); + raw_spin_rq_unlock_irqrestore(busiest, flags); if (active_balance) { stop_one_cpu_nowait(cpu_of(busiest), @@ -10649,7 +10648,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) goto out; } - raw_spin_unlock(&this_rq->lock); + raw_spin_rq_unlock(this_rq); update_blocked_averages(this_cpu); rcu_read_lock(); @@ -10688,7 +10687,7 @@ static int newidle_balance(struct rq *this_rq, struct rq_flags *rf) } rcu_read_unlock(); - raw_spin_lock(&this_rq->lock); + raw_spin_rq_lock(this_rq); if (curr_cost > this_rq->max_idle_balance_cost) this_rq->max_idle_balance_cost = curr_cost; @@ -11175,9 +11174,9 @@ void unregister_fair_sched_group(struct task_group *tg) rq = cpu_rq(cpu); - raw_spin_lock_irqsave(&rq->lock, flags); + raw_spin_rq_lock_irqsave(rq, flags); list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); - raw_spin_unlock_irqrestore(&rq->lock, flags); + raw_spin_rq_unlock_irqrestore(rq, flags); } } diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 7ca3d3d86c2a5..0194768ea9e7b 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -455,10 +455,10 @@ struct task_struct *pick_next_task_idle(struct rq *rq) static void dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags) { - raw_spin_unlock_irq(&rq->lock); + raw_spin_rq_unlock_irq(rq); printk(KERN_ERR "bad: scheduling from the idle thread!\n"); dump_stack(); - raw_spin_lock_irq(&rq->lock); + raw_spin_rq_lock_irq(rq); } /* diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index 1462846d244e3..9ed6d8c414ad6 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -141,7 +141,7 @@ static inline void update_idle_rq_clock_pelt(struct rq *rq) static inline u64 rq_clock_pelt(struct rq *rq) { - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); assert_clock_updated(rq); return rq->clock_pelt - rq->lost_idle_time; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index c286e5ba3c942..b3d39c3d3ab34 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -888,7 +888,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (skip) continue; - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); update_rq_clock(rq); if (rt_rq->rt_time) { @@ -926,7 +926,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) if (enqueue) sched_rt_rq_enqueue(rt_rq); - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); } if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) @@ -1894,10 +1894,10 @@ retry: */ push_task = get_push_task(rq); if (push_task) { - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); stop_one_cpu_nowait(rq->cpu, push_cpu_stop, push_task, &rq->push_work); - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); } return 0; @@ -2122,10 +2122,10 @@ void rto_push_irq_work_func(struct irq_work *work) * When it gets updated, a check is made if a push is possible. */ if (has_pushable_tasks(rq)) { - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); while (push_rt_task(rq, true)) ; - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); } raw_spin_lock(&rd->rto_lock); @@ -2243,10 +2243,10 @@ skip: double_unlock_balance(this_rq, src_rq); if (push_task) { - raw_spin_unlock(&this_rq->lock); + raw_spin_rq_unlock(this_rq); stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop, push_task, &src_rq->push_work); - raw_spin_lock(&this_rq->lock); + raw_spin_rq_lock(this_rq); } } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f654587106355..dbabf282c039a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -905,7 +905,7 @@ DECLARE_STATIC_KEY_FALSE(sched_uclamp_used); */ struct rq { /* runqueue lock: */ - raw_spinlock_t lock; + raw_spinlock_t __lock; /* * nr_running and cpu_load should be in the same cacheline because @@ -1115,7 +1115,7 @@ static inline bool is_migration_disabled(struct task_struct *p) static inline raw_spinlock_t *rq_lockp(struct rq *rq) { - return &rq->lock; + return &rq->__lock; } static inline void lockdep_assert_rq_held(struct rq *rq) @@ -1229,7 +1229,7 @@ static inline void assert_clock_updated(struct rq *rq) static inline u64 rq_clock(struct rq *rq) { - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); assert_clock_updated(rq); return rq->clock; @@ -1237,7 +1237,7 @@ static inline u64 rq_clock(struct rq *rq) static inline u64 rq_clock_task(struct rq *rq) { - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); assert_clock_updated(rq); return rq->clock_task; @@ -1263,7 +1263,7 @@ static inline u64 rq_clock_thermal(struct rq *rq) static inline void rq_clock_skip_update(struct rq *rq) { - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); rq->clock_update_flags |= RQCF_REQ_SKIP; } @@ -1273,7 +1273,7 @@ static inline void rq_clock_skip_update(struct rq *rq) */ static inline void rq_clock_cancel_skipupdate(struct rq *rq) { - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); rq->clock_update_flags &= ~RQCF_REQ_SKIP; } @@ -1304,7 +1304,7 @@ extern struct callback_head balance_push_callback; */ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) { - rf->cookie = lockdep_pin_lock(&rq->lock); + rf->cookie = lockdep_pin_lock(rq_lockp(rq)); #ifdef CONFIG_SCHED_DEBUG rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); @@ -1322,12 +1322,12 @@ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) rf->clock_update_flags = RQCF_UPDATED; #endif - lockdep_unpin_lock(&rq->lock, rf->cookie); + lockdep_unpin_lock(rq_lockp(rq), rf->cookie); } static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf) { - lockdep_repin_lock(&rq->lock, rf->cookie); + lockdep_repin_lock(rq_lockp(rq), rf->cookie); #ifdef CONFIG_SCHED_DEBUG /* @@ -1348,7 +1348,7 @@ static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) { rq_unpin_lock(rq, rf); - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); } static inline void @@ -1357,7 +1357,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) __releases(p->pi_lock) { rq_unpin_lock(rq, rf); - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); } @@ -1365,7 +1365,7 @@ static inline void rq_lock_irqsave(struct rq *rq, struct rq_flags *rf) __acquires(rq->lock) { - raw_spin_lock_irqsave(&rq->lock, rf->flags); + raw_spin_rq_lock_irqsave(rq, rf->flags); rq_pin_lock(rq, rf); } @@ -1373,7 +1373,7 @@ static inline void rq_lock_irq(struct rq *rq, struct rq_flags *rf) __acquires(rq->lock) { - raw_spin_lock_irq(&rq->lock); + raw_spin_rq_lock_irq(rq); rq_pin_lock(rq, rf); } @@ -1381,7 +1381,7 @@ static inline void rq_lock(struct rq *rq, struct rq_flags *rf) __acquires(rq->lock) { - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); rq_pin_lock(rq, rf); } @@ -1389,7 +1389,7 @@ static inline void rq_relock(struct rq *rq, struct rq_flags *rf) __acquires(rq->lock) { - raw_spin_lock(&rq->lock); + raw_spin_rq_lock(rq); rq_repin_lock(rq, rf); } @@ -1398,7 +1398,7 @@ rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) { rq_unpin_lock(rq, rf); - raw_spin_unlock_irqrestore(&rq->lock, rf->flags); + raw_spin_rq_unlock_irqrestore(rq, rf->flags); } static inline void @@ -1406,7 +1406,7 @@ rq_unlock_irq(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) { rq_unpin_lock(rq, rf); - raw_spin_unlock_irq(&rq->lock); + raw_spin_rq_unlock_irq(rq); } static inline void @@ -1414,7 +1414,7 @@ rq_unlock(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) { rq_unpin_lock(rq, rf); - raw_spin_unlock(&rq->lock); + raw_spin_rq_unlock(rq); } static inline struct rq * @@ -1479,7 +1479,7 @@ queue_balance_callback(struct rq *rq, struct callback_head *head, void (*func)(struct rq *rq)) { - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); if (unlikely(head->next || rq->balance_callback == &balance_push_callback)) return; @@ -2019,7 +2019,7 @@ static inline struct task_struct *get_push_task(struct rq *rq) { struct task_struct *p = rq->curr; - lockdep_assert_held(&rq->lock); + lockdep_assert_rq_held(rq); if (rq->push_busy) return NULL; @@ -2249,7 +2249,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) __acquires(busiest->lock) __acquires(this_rq->lock) { - raw_spin_unlock(&this_rq->lock); + raw_spin_rq_unlock(this_rq); double_rq_lock(this_rq, busiest); return 1; @@ -2268,20 +2268,22 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) __acquires(busiest->lock) __acquires(this_rq->lock) { - int ret = 0; - - if (unlikely(!raw_spin_trylock(&busiest->lock))) { - if (busiest < this_rq) { - raw_spin_unlock(&this_rq->lock); - raw_spin_lock(&busiest->lock); - raw_spin_lock_nested(&this_rq->lock, - SINGLE_DEPTH_NESTING); - ret = 1; - } else - raw_spin_lock_nested(&busiest->lock, - SINGLE_DEPTH_NESTING); + if (rq_lockp(this_rq) == rq_lockp(busiest)) + return 0; + + if (likely(raw_spin_rq_trylock(busiest))) + return 0; + + if (rq_lockp(busiest) >= rq_lockp(this_rq)) { + raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING); + return 0; } - return ret; + + raw_spin_rq_unlock(this_rq); + raw_spin_rq_lock(busiest); + raw_spin_rq_lock_nested(this_rq, SINGLE_DEPTH_NESTING); + + return 1; } #endif /* CONFIG_PREEMPTION */ @@ -2291,11 +2293,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) */ static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest) { - if (unlikely(!irqs_disabled())) { - /* printk() doesn't work well under rq->lock */ - raw_spin_unlock(&this_rq->lock); - BUG_ON(1); - } + lockdep_assert_irqs_disabled(); return _double_lock_balance(this_rq, busiest); } @@ -2303,8 +2301,9 @@ static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest) static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) __releases(busiest->lock) { - raw_spin_unlock(&busiest->lock); - lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); + if (rq_lockp(this_rq) != rq_lockp(busiest)) + raw_spin_rq_unlock(busiest); + lock_set_subclass(&rq_lockp(this_rq)->dep_map, 0, _RET_IP_); } static inline void double_lock(spinlock_t *l1, spinlock_t *l2) @@ -2345,16 +2344,16 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) __acquires(rq2->lock) { BUG_ON(!irqs_disabled()); - if (rq1 == rq2) { - raw_spin_lock(&rq1->lock); + if (rq_lockp(rq1) == rq_lockp(rq2)) { + raw_spin_rq_lock(rq1); __acquire(rq2->lock); /* Fake it out ;) */ } else { - if (rq1 < rq2) { - raw_spin_lock(&rq1->lock); - raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); + if (rq_lockp(rq1) < rq_lockp(rq2)) { + raw_spin_rq_lock(rq1); + raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING); } else { - raw_spin_lock(&rq2->lock); - raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); + raw_spin_rq_lock(rq2); + raw_spin_rq_lock_nested(rq1, SINGLE_DEPTH_NESTING); } } } @@ -2369,9 +2368,9 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) __releases(rq1->lock) __releases(rq2->lock) { - raw_spin_unlock(&rq1->lock); - if (rq1 != rq2) - raw_spin_unlock(&rq2->lock); + raw_spin_rq_unlock(rq1); + if (rq_lockp(rq1) != rq_lockp(rq2)) + raw_spin_rq_unlock(rq2); else __release(rq2->lock); } @@ -2394,7 +2393,7 @@ static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) { BUG_ON(!irqs_disabled()); BUG_ON(rq1 != rq2); - raw_spin_lock(&rq1->lock); + raw_spin_rq_lock(rq1); __acquire(rq2->lock); /* Fake it out ;) */ } @@ -2409,7 +2408,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) __releases(rq2->lock) { BUG_ON(rq1 != rq2); - raw_spin_unlock(&rq1->lock); + raw_spin_rq_unlock(rq1); __release(rq2->lock); } diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 55a0a243e8718..053115b55f89f 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -467,7 +467,7 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd) struct root_domain *old_rd = NULL; unsigned long flags; - raw_spin_lock_irqsave(&rq->lock, flags); + raw_spin_rq_lock_irqsave(rq, flags); if (rq->rd) { old_rd = rq->rd; @@ -493,7 +493,7 @@ void rq_attach_root(struct rq *rq, struct root_domain *rd) if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) set_rq_online(rq); - raw_spin_unlock_irqrestore(&rq->lock, flags); + raw_spin_rq_unlock_irqrestore(rq, flags); if (old_rd) call_rcu(&old_rd->rcu, free_rootdomain); -- GitLab From d66f1b06b5b438cd20ba3664b8eef1f9c79e84bf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Mar 2021 12:16:48 +0100 Subject: [PATCH 0444/3804] sched: Prepare for Core-wide rq->lock When switching on core-sched, CPUs need to agree which lock to use for their RQ. The new rule will be that rq->core_enabled will be toggled while holding all rq->__locks that belong to a core. This means we need to double check the rq->core_enabled value after each lock acquire and retry if it changed. This also has implications for those sites that take multiple RQ locks, they need to be careful that the second lock doesn't end up being the first lock. Verify the lock pointer after acquiring the first lock, because if they're on the same core, holding any of the rq->__lock instances will pin the core state. While there, change the rq->__lock order to CPU number, instead of rq address, this greatly simplifies the next patch. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/YJUNY0dmrJMD/BIm@hirez.programming.kicks-ass.net --- kernel/sched/core.c | 48 ++++++++++++++++++++++++++++++++++++++++++-- kernel/sched/sched.h | 48 ++++++++++++++++---------------------------- 2 files changed, 63 insertions(+), 33 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5e6f5f5750a32..8bd2f12810e30 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -186,12 +186,37 @@ int sysctl_sched_rt_runtime = 950000; void raw_spin_rq_lock_nested(struct rq *rq, int subclass) { - raw_spin_lock_nested(rq_lockp(rq), subclass); + raw_spinlock_t *lock; + + if (sched_core_disabled()) { + raw_spin_lock_nested(&rq->__lock, subclass); + return; + } + + for (;;) { + lock = rq_lockp(rq); + raw_spin_lock_nested(lock, subclass); + if (likely(lock == rq_lockp(rq))) + return; + raw_spin_unlock(lock); + } } bool raw_spin_rq_trylock(struct rq *rq) { - return raw_spin_trylock(rq_lockp(rq)); + raw_spinlock_t *lock; + bool ret; + + if (sched_core_disabled()) + return raw_spin_trylock(&rq->__lock); + + for (;;) { + lock = rq_lockp(rq); + ret = raw_spin_trylock(lock); + if (!ret || (likely(lock == rq_lockp(rq)))) + return ret; + raw_spin_unlock(lock); + } } void raw_spin_rq_unlock(struct rq *rq) @@ -199,6 +224,25 @@ void raw_spin_rq_unlock(struct rq *rq) raw_spin_unlock(rq_lockp(rq)); } +#ifdef CONFIG_SMP +/* + * double_rq_lock - safely lock two runqueues + */ +void double_rq_lock(struct rq *rq1, struct rq *rq2) +{ + lockdep_assert_irqs_disabled(); + + if (rq_order_less(rq2, rq1)) + swap(rq1, rq2); + + raw_spin_rq_lock(rq1); + if (rq_lockp(rq1) == rq_lockp(rq2)) + return; + + raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING); +} +#endif + /* * __task_rq_lock - lock the rq @p resides on. */ diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index dbabf282c039a..f8bd5c8fc90aa 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1113,6 +1113,11 @@ static inline bool is_migration_disabled(struct task_struct *p) #endif } +static inline bool sched_core_disabled(void) +{ + return true; +} + static inline raw_spinlock_t *rq_lockp(struct rq *rq) { return &rq->__lock; @@ -2231,10 +2236,17 @@ unsigned long arch_scale_freq_capacity(int cpu) } #endif + #ifdef CONFIG_SMP -#ifdef CONFIG_PREEMPTION -static inline void double_rq_lock(struct rq *rq1, struct rq *rq2); +static inline bool rq_order_less(struct rq *rq1, struct rq *rq2) +{ + return rq1->cpu < rq2->cpu; +} + +extern void double_rq_lock(struct rq *rq1, struct rq *rq2); + +#ifdef CONFIG_PREEMPTION /* * fair double_lock_balance: Safely acquires both rq->locks in a fair @@ -2274,14 +2286,13 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) if (likely(raw_spin_rq_trylock(busiest))) return 0; - if (rq_lockp(busiest) >= rq_lockp(this_rq)) { + if (rq_order_less(this_rq, busiest)) { raw_spin_rq_lock_nested(busiest, SINGLE_DEPTH_NESTING); return 0; } raw_spin_rq_unlock(this_rq); - raw_spin_rq_lock(busiest); - raw_spin_rq_lock_nested(this_rq, SINGLE_DEPTH_NESTING); + double_rq_lock(this_rq, busiest); return 1; } @@ -2333,31 +2344,6 @@ static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2) raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING); } -/* - * double_rq_lock - safely lock two runqueues - * - * Note this does not disable interrupts like task_rq_lock, - * you need to do so manually before calling. - */ -static inline void double_rq_lock(struct rq *rq1, struct rq *rq2) - __acquires(rq1->lock) - __acquires(rq2->lock) -{ - BUG_ON(!irqs_disabled()); - if (rq_lockp(rq1) == rq_lockp(rq2)) { - raw_spin_rq_lock(rq1); - __acquire(rq2->lock); /* Fake it out ;) */ - } else { - if (rq_lockp(rq1) < rq_lockp(rq2)) { - raw_spin_rq_lock(rq1); - raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING); - } else { - raw_spin_rq_lock(rq2); - raw_spin_rq_lock_nested(rq1, SINGLE_DEPTH_NESTING); - } - } -} - /* * double_rq_unlock - safely unlock two runqueues * @@ -2368,11 +2354,11 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) __releases(rq1->lock) __releases(rq2->lock) { - raw_spin_rq_unlock(rq1); if (rq_lockp(rq1) != rq_lockp(rq2)) raw_spin_rq_unlock(rq2); else __release(rq2->lock); + raw_spin_rq_unlock(rq1); } extern void set_rq_online (struct rq *rq); -- GitLab From 9edeaea1bc452372718837ed2ba775811baf1ba1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 18:19:34 -0500 Subject: [PATCH 0445/3804] sched: Core-wide rq->lock Introduce the basic infrastructure to have a core wide rq->lock. This relies on the rq->__lock order being in increasing CPU number (inside a core). It is also constrained to SMT8 per lockdep (and SMT256 per preempt_count). Luckily SMT8 is the max supported SMT count for Linux (Mips, Sparc and Power are known to have this). Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/YJUNfzSgptjX7tG6@hirez.programming.kicks-ass.net --- kernel/Kconfig.preempt | 6 ++ kernel/sched/core.c | 164 ++++++++++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 58 +++++++++++++++ 3 files changed, 224 insertions(+), 4 deletions(-) diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 4160173016605..ea1e3331c0ba3 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -99,3 +99,9 @@ config PREEMPT_DYNAMIC Interesting if you want the same pre-built kernel should be used for both Server and Desktop workloads. + +config SCHED_CORE + bool "Core Scheduling for SMT" + default y + depends on SCHED_SMT + diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8bd2f12810e30..384b79363a393 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -84,6 +84,108 @@ unsigned int sysctl_sched_rt_period = 1000000; __read_mostly int scheduler_running; +#ifdef CONFIG_SCHED_CORE + +DEFINE_STATIC_KEY_FALSE(__sched_core_enabled); + +/* + * Magic required such that: + * + * raw_spin_rq_lock(rq); + * ... + * raw_spin_rq_unlock(rq); + * + * ends up locking and unlocking the _same_ lock, and all CPUs + * always agree on what rq has what lock. + * + * XXX entirely possible to selectively enable cores, don't bother for now. + */ + +static DEFINE_MUTEX(sched_core_mutex); +static int sched_core_count; +static struct cpumask sched_core_mask; + +static void __sched_core_flip(bool enabled) +{ + int cpu, t, i; + + cpus_read_lock(); + + /* + * Toggle the online cores, one by one. + */ + cpumask_copy(&sched_core_mask, cpu_online_mask); + for_each_cpu(cpu, &sched_core_mask) { + const struct cpumask *smt_mask = cpu_smt_mask(cpu); + + i = 0; + local_irq_disable(); + for_each_cpu(t, smt_mask) { + /* supports up to SMT8 */ + raw_spin_lock_nested(&cpu_rq(t)->__lock, i++); + } + + for_each_cpu(t, smt_mask) + cpu_rq(t)->core_enabled = enabled; + + for_each_cpu(t, smt_mask) + raw_spin_unlock(&cpu_rq(t)->__lock); + local_irq_enable(); + + cpumask_andnot(&sched_core_mask, &sched_core_mask, smt_mask); + } + + /* + * Toggle the offline CPUs. + */ + cpumask_copy(&sched_core_mask, cpu_possible_mask); + cpumask_andnot(&sched_core_mask, &sched_core_mask, cpu_online_mask); + + for_each_cpu(cpu, &sched_core_mask) + cpu_rq(cpu)->core_enabled = enabled; + + cpus_read_unlock(); +} + +static void __sched_core_enable(void) +{ + // XXX verify there are no cookie tasks (yet) + + static_branch_enable(&__sched_core_enabled); + /* + * Ensure all previous instances of raw_spin_rq_*lock() have finished + * and future ones will observe !sched_core_disabled(). + */ + synchronize_rcu(); + __sched_core_flip(true); +} + +static void __sched_core_disable(void) +{ + // XXX verify there are no cookie tasks (left) + + __sched_core_flip(false); + static_branch_disable(&__sched_core_enabled); +} + +void sched_core_get(void) +{ + mutex_lock(&sched_core_mutex); + if (!sched_core_count++) + __sched_core_enable(); + mutex_unlock(&sched_core_mutex); +} + +void sched_core_put(void) +{ + mutex_lock(&sched_core_mutex); + if (!--sched_core_count) + __sched_core_disable(); + mutex_unlock(&sched_core_mutex); +} + +#endif /* CONFIG_SCHED_CORE */ + /* * part of the period that we allow rt tasks to run in us. * default: 0.95s @@ -188,16 +290,23 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass) { raw_spinlock_t *lock; + /* Matches synchronize_rcu() in __sched_core_enable() */ + preempt_disable(); if (sched_core_disabled()) { raw_spin_lock_nested(&rq->__lock, subclass); + /* preempt_count *MUST* be > 1 */ + preempt_enable_no_resched(); return; } for (;;) { lock = rq_lockp(rq); raw_spin_lock_nested(lock, subclass); - if (likely(lock == rq_lockp(rq))) + if (likely(lock == rq_lockp(rq))) { + /* preempt_count *MUST* be > 1 */ + preempt_enable_no_resched(); return; + } raw_spin_unlock(lock); } } @@ -207,14 +316,21 @@ bool raw_spin_rq_trylock(struct rq *rq) raw_spinlock_t *lock; bool ret; - if (sched_core_disabled()) - return raw_spin_trylock(&rq->__lock); + /* Matches synchronize_rcu() in __sched_core_enable() */ + preempt_disable(); + if (sched_core_disabled()) { + ret = raw_spin_trylock(&rq->__lock); + preempt_enable(); + return ret; + } for (;;) { lock = rq_lockp(rq); ret = raw_spin_trylock(lock); - if (!ret || (likely(lock == rq_lockp(rq)))) + if (!ret || (likely(lock == rq_lockp(rq)))) { + preempt_enable(); return ret; + } raw_spin_unlock(lock); } } @@ -5041,6 +5157,40 @@ restart: BUG(); } +#ifdef CONFIG_SCHED_CORE + +static inline void sched_core_cpu_starting(unsigned int cpu) +{ + const struct cpumask *smt_mask = cpu_smt_mask(cpu); + struct rq *rq, *core_rq = NULL; + int i; + + core_rq = cpu_rq(cpu)->core; + + if (!core_rq) { + for_each_cpu(i, smt_mask) { + rq = cpu_rq(i); + if (rq->core && rq->core == rq) + core_rq = rq; + } + + if (!core_rq) + core_rq = cpu_rq(cpu); + + for_each_cpu(i, smt_mask) { + rq = cpu_rq(i); + + WARN_ON_ONCE(rq->core && rq->core != core_rq); + rq->core = core_rq; + } + } +} +#else /* !CONFIG_SCHED_CORE */ + +static inline void sched_core_cpu_starting(unsigned int cpu) {} + +#endif /* CONFIG_SCHED_CORE */ + /* * __schedule() is the main scheduler function. * @@ -8006,6 +8156,7 @@ static void sched_rq_cpu_starting(unsigned int cpu) int sched_cpu_starting(unsigned int cpu) { + sched_core_cpu_starting(cpu); sched_rq_cpu_starting(cpu); sched_tick_start(cpu); return 0; @@ -8290,6 +8441,11 @@ void __init sched_init(void) #endif /* CONFIG_SMP */ hrtick_rq_init(rq); atomic_set(&rq->nr_iowait, 0); + +#ifdef CONFIG_SCHED_CORE + rq->core = NULL; + rq->core_enabled = 0; +#endif } set_load_weight(&init_task, false); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index f8bd5c8fc90aa..29418b8c05dd2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1075,6 +1075,12 @@ struct rq { #endif unsigned int push_busy; struct cpu_stop_work push_work; + +#ifdef CONFIG_SCHED_CORE + /* per rq */ + struct rq *core; + unsigned int core_enabled; +#endif }; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -1113,6 +1119,35 @@ static inline bool is_migration_disabled(struct task_struct *p) #endif } +#ifdef CONFIG_SCHED_CORE + +DECLARE_STATIC_KEY_FALSE(__sched_core_enabled); + +static inline bool sched_core_enabled(struct rq *rq) +{ + return static_branch_unlikely(&__sched_core_enabled) && rq->core_enabled; +} + +static inline bool sched_core_disabled(void) +{ + return !static_branch_unlikely(&__sched_core_enabled); +} + +static inline raw_spinlock_t *rq_lockp(struct rq *rq) +{ + if (sched_core_enabled(rq)) + return &rq->core->__lock; + + return &rq->__lock; +} + +#else /* !CONFIG_SCHED_CORE */ + +static inline bool sched_core_enabled(struct rq *rq) +{ + return false; +} + static inline bool sched_core_disabled(void) { return true; @@ -1123,6 +1158,8 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq) return &rq->__lock; } +#endif /* CONFIG_SCHED_CORE */ + static inline void lockdep_assert_rq_held(struct rq *rq) { lockdep_assert_held(rq_lockp(rq)); @@ -2241,6 +2278,27 @@ unsigned long arch_scale_freq_capacity(int cpu) static inline bool rq_order_less(struct rq *rq1, struct rq *rq2) { +#ifdef CONFIG_SCHED_CORE + /* + * In order to not have {0,2},{1,3} turn into into an AB-BA, + * order by core-id first and cpu-id second. + * + * Notably: + * + * double_rq_lock(0,3); will take core-0, core-1 lock + * double_rq_lock(1,2); will take core-1, core-0 lock + * + * when only cpu-id is considered. + */ + if (rq1->core->cpu < rq2->core->cpu) + return true; + if (rq1->core->cpu > rq2->core->cpu) + return false; + + /* + * __sched_core_flip() relies on SMT having cpu-id lock order. + */ +#endif return rq1->cpu < rq2->cpu; } -- GitLab From 9ef7e7e33bcdb57be1afb28884053c28b5f05240 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Mar 2021 16:45:41 +0100 Subject: [PATCH 0446/3804] sched: Optimize rq_lockp() usage rq_lockp() includes a static_branch(), which is asm-goto, which is asm volatile which defeats regular CSE. This means that: if (!static_branch(&foo)) return simple; if (static_branch(&foo) && cond) return complex; Doesn't fold and we get horrible code. Introduce __rq_lockp() without the static_branch() on. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.316696988@infradead.org --- kernel/sched/core.c | 16 ++++++++-------- kernel/sched/deadline.c | 4 ++-- kernel/sched/fair.c | 2 +- kernel/sched/sched.h | 33 +++++++++++++++++++++++++-------- 4 files changed, 36 insertions(+), 19 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 384b79363a393..42c1c88741c02 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -300,9 +300,9 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass) } for (;;) { - lock = rq_lockp(rq); + lock = __rq_lockp(rq); raw_spin_lock_nested(lock, subclass); - if (likely(lock == rq_lockp(rq))) { + if (likely(lock == __rq_lockp(rq))) { /* preempt_count *MUST* be > 1 */ preempt_enable_no_resched(); return; @@ -325,9 +325,9 @@ bool raw_spin_rq_trylock(struct rq *rq) } for (;;) { - lock = rq_lockp(rq); + lock = __rq_lockp(rq); ret = raw_spin_trylock(lock); - if (!ret || (likely(lock == rq_lockp(rq)))) { + if (!ret || (likely(lock == __rq_lockp(rq)))) { preempt_enable(); return ret; } @@ -352,7 +352,7 @@ void double_rq_lock(struct rq *rq1, struct rq *rq2) swap(rq1, rq2); raw_spin_rq_lock(rq1); - if (rq_lockp(rq1) == rq_lockp(rq2)) + if (__rq_lockp(rq1) == __rq_lockp(rq2)) return; raw_spin_rq_lock_nested(rq2, SINGLE_DEPTH_NESTING); @@ -2622,7 +2622,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) * task_rq_lock(). */ WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || - lockdep_is_held(rq_lockp(task_rq(p))))); + lockdep_is_held(__rq_lockp(task_rq(p))))); #endif /* * Clearly, migrating tasks to offline CPUs is a fairly daft thing. @@ -4248,7 +4248,7 @@ prepare_lock_switch(struct rq *rq, struct task_struct *next, struct rq_flags *rf * do an early lockdep release here: */ rq_unpin_lock(rq, rf); - spin_release(&rq_lockp(rq)->dep_map, _THIS_IP_); + spin_release(&__rq_lockp(rq)->dep_map, _THIS_IP_); #ifdef CONFIG_DEBUG_SPINLOCK /* this is a valid case when another task releases the spinlock */ rq_lockp(rq)->owner = next; @@ -4262,7 +4262,7 @@ static inline void finish_lock_switch(struct rq *rq) * fix up the runqueue lock - which gets 'carried over' from * prev into current: */ - spin_acquire(&rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_); + spin_acquire(&__rq_lockp(rq)->dep_map, 0, 0, _THIS_IP_); __balance_callbacks(rq); raw_spin_rq_unlock_irq(rq); } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 6e99b8b37c8c8..fb0eb9a19fc7c 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1097,9 +1097,9 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) * If the runqueue is no longer available, migrate the * task elsewhere. This necessarily changes rq. */ - lockdep_unpin_lock(rq_lockp(rq), rf.cookie); + lockdep_unpin_lock(__rq_lockp(rq), rf.cookie); rq = dl_task_offline_migration(rq, p); - rf.cookie = lockdep_pin_lock(rq_lockp(rq)); + rf.cookie = lockdep_pin_lock(__rq_lockp(rq)); update_rq_clock(rq); /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e50bd75067d58..18960d00708a4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1107,7 +1107,7 @@ struct numa_group { static struct numa_group *deref_task_numa_group(struct task_struct *p) { return rcu_dereference_check(p->numa_group, p == current || - (lockdep_is_held(rq_lockp(task_rq(p))) && !READ_ONCE(p->on_cpu))); + (lockdep_is_held(__rq_lockp(task_rq(p))) && !READ_ONCE(p->on_cpu))); } static struct numa_group *deref_curr_numa_group(struct task_struct *p) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 29418b8c05dd2..ca30af37b9060 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1133,6 +1133,10 @@ static inline bool sched_core_disabled(void) return !static_branch_unlikely(&__sched_core_enabled); } +/* + * Be careful with this function; not for general use. The return value isn't + * stable unless you actually hold a relevant rq->__lock. + */ static inline raw_spinlock_t *rq_lockp(struct rq *rq) { if (sched_core_enabled(rq)) @@ -1141,6 +1145,14 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq) return &rq->__lock; } +static inline raw_spinlock_t *__rq_lockp(struct rq *rq) +{ + if (rq->core_enabled) + return &rq->core->__lock; + + return &rq->__lock; +} + #else /* !CONFIG_SCHED_CORE */ static inline bool sched_core_enabled(struct rq *rq) @@ -1158,11 +1170,16 @@ static inline raw_spinlock_t *rq_lockp(struct rq *rq) return &rq->__lock; } +static inline raw_spinlock_t *__rq_lockp(struct rq *rq) +{ + return &rq->__lock; +} + #endif /* CONFIG_SCHED_CORE */ static inline void lockdep_assert_rq_held(struct rq *rq) { - lockdep_assert_held(rq_lockp(rq)); + lockdep_assert_held(__rq_lockp(rq)); } extern void raw_spin_rq_lock_nested(struct rq *rq, int subclass); @@ -1346,7 +1363,7 @@ extern struct callback_head balance_push_callback; */ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) { - rf->cookie = lockdep_pin_lock(rq_lockp(rq)); + rf->cookie = lockdep_pin_lock(__rq_lockp(rq)); #ifdef CONFIG_SCHED_DEBUG rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); @@ -1364,12 +1381,12 @@ static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf) rf->clock_update_flags = RQCF_UPDATED; #endif - lockdep_unpin_lock(rq_lockp(rq), rf->cookie); + lockdep_unpin_lock(__rq_lockp(rq), rf->cookie); } static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf) { - lockdep_repin_lock(rq_lockp(rq), rf->cookie); + lockdep_repin_lock(__rq_lockp(rq), rf->cookie); #ifdef CONFIG_SCHED_DEBUG /* @@ -2338,7 +2355,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) __acquires(busiest->lock) __acquires(this_rq->lock) { - if (rq_lockp(this_rq) == rq_lockp(busiest)) + if (__rq_lockp(this_rq) == __rq_lockp(busiest)) return 0; if (likely(raw_spin_rq_trylock(busiest))) @@ -2370,9 +2387,9 @@ static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest) static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) __releases(busiest->lock) { - if (rq_lockp(this_rq) != rq_lockp(busiest)) + if (__rq_lockp(this_rq) != __rq_lockp(busiest)) raw_spin_rq_unlock(busiest); - lock_set_subclass(&rq_lockp(this_rq)->dep_map, 0, _RET_IP_); + lock_set_subclass(&__rq_lockp(this_rq)->dep_map, 0, _RET_IP_); } static inline void double_lock(spinlock_t *l1, spinlock_t *l2) @@ -2412,7 +2429,7 @@ static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2) __releases(rq1->lock) __releases(rq2->lock) { - if (rq_lockp(rq1) != rq_lockp(rq2)) + if (__rq_lockp(rq1) != __rq_lockp(rq2)) raw_spin_rq_unlock(rq2); else __release(rq2->lock); -- GitLab From 875feb41fd20f6bd6054c9e79a5bcd9da6d8d2b2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 29 Mar 2021 10:08:58 +0200 Subject: [PATCH 0447/3804] sched: Allow sched_core_put() from atomic context Stuff the meat of sched_core_put() into a work such that we can use sched_core_put() from atomic context. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.377455632@infradead.org --- kernel/sched/core.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 42c1c88741c02..85147bea9d93d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -102,7 +102,7 @@ DEFINE_STATIC_KEY_FALSE(__sched_core_enabled); */ static DEFINE_MUTEX(sched_core_mutex); -static int sched_core_count; +static atomic_t sched_core_count; static struct cpumask sched_core_mask; static void __sched_core_flip(bool enabled) @@ -170,18 +170,39 @@ static void __sched_core_disable(void) void sched_core_get(void) { + if (atomic_inc_not_zero(&sched_core_count)) + return; + mutex_lock(&sched_core_mutex); - if (!sched_core_count++) + if (!atomic_read(&sched_core_count)) __sched_core_enable(); + + smp_mb__before_atomic(); + atomic_inc(&sched_core_count); mutex_unlock(&sched_core_mutex); } -void sched_core_put(void) +static void __sched_core_put(struct work_struct *work) { - mutex_lock(&sched_core_mutex); - if (!--sched_core_count) + if (atomic_dec_and_mutex_lock(&sched_core_count, &sched_core_mutex)) { __sched_core_disable(); - mutex_unlock(&sched_core_mutex); + mutex_unlock(&sched_core_mutex); + } +} + +void sched_core_put(void) +{ + static DECLARE_WORK(_work, __sched_core_put); + + /* + * "There can be only one" + * + * Either this is the last one, or we don't actually need to do any + * 'work'. If it is the last *again*, we rely on + * WORK_STRUCT_PENDING_BIT. + */ + if (!atomic_add_unless(&sched_core_count, -1, 1)) + schedule_work(&_work); } #endif /* CONFIG_SCHED_CORE */ -- GitLab From 21f56ffe4482e501b9e83737612493eeaac21f5a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 18:19:32 -0500 Subject: [PATCH 0448/3804] sched: Introduce sched_class::pick_task() Because sched_class::pick_next_task() also implies sched_class::set_next_task() (and possibly put_prev_task() and newidle_balance) it is not state invariant. This makes it unsuitable for remote task selection. Signed-off-by: Peter Zijlstra (Intel) [Vineeth: folded fixes] Signed-off-by: Vineeth Remanan Pillai Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.437092775@infradead.org --- kernel/sched/deadline.c | 16 ++++++++++++++-- kernel/sched/fair.c | 40 +++++++++++++++++++++++++++++++++++++--- kernel/sched/idle.c | 8 ++++++++ kernel/sched/rt.c | 15 +++++++++++++-- kernel/sched/sched.h | 3 +++ kernel/sched/stop_task.c | 14 ++++++++++++-- 6 files changed, 87 insertions(+), 9 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index fb0eb9a19fc7c..3829c5a1b9366 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1852,7 +1852,7 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, return rb_entry(left, struct sched_dl_entity, rb_node); } -static struct task_struct *pick_next_task_dl(struct rq *rq) +static struct task_struct *pick_task_dl(struct rq *rq) { struct sched_dl_entity *dl_se; struct dl_rq *dl_rq = &rq->dl; @@ -1864,7 +1864,18 @@ static struct task_struct *pick_next_task_dl(struct rq *rq) dl_se = pick_next_dl_entity(rq, dl_rq); BUG_ON(!dl_se); p = dl_task_of(dl_se); - set_next_task_dl(rq, p, true); + + return p; +} + +static struct task_struct *pick_next_task_dl(struct rq *rq) +{ + struct task_struct *p; + + p = pick_task_dl(rq); + if (p) + set_next_task_dl(rq, p, true); + return p; } @@ -2539,6 +2550,7 @@ DEFINE_SCHED_CLASS(dl) = { #ifdef CONFIG_SMP .balance = balance_dl, + .pick_task = pick_task_dl, .select_task_rq = select_task_rq_dl, .migrate_task_rq = migrate_task_rq_dl, .set_cpus_allowed = set_cpus_allowed_dl, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 18960d00708a4..51d72ab5b5ae3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4419,6 +4419,8 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) static void set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { + clear_buddies(cfs_rq, se); + /* 'current' is not kept within the tree. */ if (se->on_rq) { /* @@ -4478,7 +4480,7 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) * Avoid running the skip buddy, if running something else can * be done without getting too unfair. */ - if (cfs_rq->skip == se) { + if (cfs_rq->skip && cfs_rq->skip == se) { struct sched_entity *second; if (se == curr) { @@ -4505,8 +4507,6 @@ pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr) se = cfs_rq->last; } - clear_buddies(cfs_rq, se); - return se; } @@ -7095,6 +7095,39 @@ preempt: set_last_buddy(se); } +#ifdef CONFIG_SMP +static struct task_struct *pick_task_fair(struct rq *rq) +{ + struct sched_entity *se; + struct cfs_rq *cfs_rq; + +again: + cfs_rq = &rq->cfs; + if (!cfs_rq->nr_running) + return NULL; + + do { + struct sched_entity *curr = cfs_rq->curr; + + /* When we pick for a remote RQ, we'll not have done put_prev_entity() */ + if (curr) { + if (curr->on_rq) + update_curr(cfs_rq); + else + curr = NULL; + + if (unlikely(check_cfs_rq_runtime(cfs_rq))) + goto again; + } + + se = pick_next_entity(cfs_rq, curr); + cfs_rq = group_cfs_rq(se); + } while (cfs_rq); + + return task_of(se); +} +#endif + struct task_struct * pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { @@ -11298,6 +11331,7 @@ DEFINE_SCHED_CLASS(fair) = { #ifdef CONFIG_SMP .balance = balance_fair, + .pick_task = pick_task_fair, .select_task_rq = select_task_rq_fair, .migrate_task_rq = migrate_task_rq_fair, diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 0194768ea9e7b..43646e7876d91 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -439,6 +439,13 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir schedstat_inc(rq->sched_goidle); } +#ifdef CONFIG_SMP +static struct task_struct *pick_task_idle(struct rq *rq) +{ + return rq->idle; +} +#endif + struct task_struct *pick_next_task_idle(struct rq *rq) { struct task_struct *next = rq->idle; @@ -506,6 +513,7 @@ DEFINE_SCHED_CLASS(idle) = { #ifdef CONFIG_SMP .balance = balance_idle, + .pick_task = pick_task_idle, .select_task_rq = select_task_rq_idle, .set_cpus_allowed = set_cpus_allowed_common, #endif diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index b3d39c3d3ab34..a5254471371c2 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1626,7 +1626,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) return rt_task_of(rt_se); } -static struct task_struct *pick_next_task_rt(struct rq *rq) +static struct task_struct *pick_task_rt(struct rq *rq) { struct task_struct *p; @@ -1634,7 +1634,17 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) return NULL; p = _pick_next_task_rt(rq); - set_next_task_rt(rq, p, true); + + return p; +} + +static struct task_struct *pick_next_task_rt(struct rq *rq) +{ + struct task_struct *p = pick_task_rt(rq); + + if (p) + set_next_task_rt(rq, p, true); + return p; } @@ -2483,6 +2493,7 @@ DEFINE_SCHED_CLASS(rt) = { #ifdef CONFIG_SMP .balance = balance_rt, + .pick_task = pick_task_rt, .select_task_rq = select_task_rq_rt, .set_cpus_allowed = set_cpus_allowed_common, .rq_online = rq_online_rt, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ca30af37b9060..fa990cd259ceb 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1953,6 +1953,9 @@ struct sched_class { #ifdef CONFIG_SMP int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); int (*select_task_rq)(struct task_struct *p, int task_cpu, int flags); + + struct task_struct * (*pick_task)(struct rq *rq); + void (*migrate_task_rq)(struct task_struct *p, int new_cpu); void (*task_woken)(struct rq *this_rq, struct task_struct *task); diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 55f39125c0e1c..f988ebe3febb9 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -34,15 +34,24 @@ static void set_next_task_stop(struct rq *rq, struct task_struct *stop, bool fir stop->se.exec_start = rq_clock_task(rq); } -static struct task_struct *pick_next_task_stop(struct rq *rq) +static struct task_struct *pick_task_stop(struct rq *rq) { if (!sched_stop_runnable(rq)) return NULL; - set_next_task_stop(rq, rq->stop, true); return rq->stop; } +static struct task_struct *pick_next_task_stop(struct rq *rq) +{ + struct task_struct *p = pick_task_stop(rq); + + if (p) + set_next_task_stop(rq, p, true); + + return p; +} + static void enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags) { @@ -123,6 +132,7 @@ DEFINE_SCHED_CLASS(stop) = { #ifdef CONFIG_SMP .balance = balance_stop, + .pick_task = pick_task_stop, .select_task_rq = select_task_rq_stop, .set_cpus_allowed = set_cpus_allowed_common, #endif -- GitLab From 8a311c740b53324ec584e0e3bb7077d56b123c28 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 18:19:36 -0500 Subject: [PATCH 0449/3804] sched: Basic tracking of matching tasks Introduce task_struct::core_cookie as an opaque identifier for core scheduling. When enabled; core scheduling will only allow matching task to be on the core; where idle matches everything. When task_struct::core_cookie is set (and core scheduling is enabled) these tasks are indexed in a second RB-tree, first on cookie value then on scheduling function, such that matching task selection always finds the most elegible match. NOTE: *shudder* at the overhead... NOTE: *sigh*, a 3rd copy of the scheduling function; the alternative is per class tracking of cookies and that just duplicates a lot of stuff for no raisin (the 2nd copy lives in the rt-mutex PI code). [Joel: folded fixes] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Joel Fernandes (Google) Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.496975854@infradead.org --- include/linux/sched.h | 8 ++- kernel/sched/core.c | 152 ++++++++++++++++++++++++++++++++++++++++-- kernel/sched/fair.c | 46 ------------- kernel/sched/sched.h | 55 +++++++++++++++ 4 files changed, 210 insertions(+), 51 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index d2c881384517b..45eedccf86aae 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -700,10 +700,16 @@ struct task_struct { const struct sched_class *sched_class; struct sched_entity se; struct sched_rt_entity rt; + struct sched_dl_entity dl; + +#ifdef CONFIG_SCHED_CORE + struct rb_node core_node; + unsigned long core_cookie; +#endif + #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #endif - struct sched_dl_entity dl; #ifdef CONFIG_UCLAMP_TASK /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 85147bea9d93d..c057d471c025d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -88,6 +88,133 @@ __read_mostly int scheduler_running; DEFINE_STATIC_KEY_FALSE(__sched_core_enabled); +/* kernel prio, less is more */ +static inline int __task_prio(struct task_struct *p) +{ + if (p->sched_class == &stop_sched_class) /* trumps deadline */ + return -2; + + if (rt_prio(p->prio)) /* includes deadline */ + return p->prio; /* [-1, 99] */ + + if (p->sched_class == &idle_sched_class) + return MAX_RT_PRIO + NICE_WIDTH; /* 140 */ + + return MAX_RT_PRIO + MAX_NICE; /* 120, squash fair */ +} + +/* + * l(a,b) + * le(a,b) := !l(b,a) + * g(a,b) := l(b,a) + * ge(a,b) := !l(a,b) + */ + +/* real prio, less is less */ +static inline bool prio_less(struct task_struct *a, struct task_struct *b) +{ + + int pa = __task_prio(a), pb = __task_prio(b); + + if (-pa < -pb) + return true; + + if (-pb < -pa) + return false; + + if (pa == -1) /* dl_prio() doesn't work because of stop_class above */ + return !dl_time_before(a->dl.deadline, b->dl.deadline); + + if (pa == MAX_RT_PRIO + MAX_NICE) { /* fair */ + u64 vruntime = b->se.vruntime; + + /* + * Normalize the vruntime if tasks are in different cpus. + */ + if (task_cpu(a) != task_cpu(b)) { + vruntime -= task_cfs_rq(b)->min_vruntime; + vruntime += task_cfs_rq(a)->min_vruntime; + } + + return !((s64)(a->se.vruntime - vruntime) <= 0); + } + + return false; +} + +static inline bool __sched_core_less(struct task_struct *a, struct task_struct *b) +{ + if (a->core_cookie < b->core_cookie) + return true; + + if (a->core_cookie > b->core_cookie) + return false; + + /* flip prio, so high prio is leftmost */ + if (prio_less(b, a)) + return true; + + return false; +} + +#define __node_2_sc(node) rb_entry((node), struct task_struct, core_node) + +static inline bool rb_sched_core_less(struct rb_node *a, const struct rb_node *b) +{ + return __sched_core_less(__node_2_sc(a), __node_2_sc(b)); +} + +static inline int rb_sched_core_cmp(const void *key, const struct rb_node *node) +{ + const struct task_struct *p = __node_2_sc(node); + unsigned long cookie = (unsigned long)key; + + if (cookie < p->core_cookie) + return -1; + + if (cookie > p->core_cookie) + return 1; + + return 0; +} + +static void sched_core_enqueue(struct rq *rq, struct task_struct *p) +{ + rq->core->core_task_seq++; + + if (!p->core_cookie) + return; + + rb_add(&p->core_node, &rq->core_tree, rb_sched_core_less); +} + +static void sched_core_dequeue(struct rq *rq, struct task_struct *p) +{ + rq->core->core_task_seq++; + + if (!p->core_cookie) + return; + + rb_erase(&p->core_node, &rq->core_tree); +} + +/* + * Find left-most (aka, highest priority) task matching @cookie. + */ +static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie) +{ + struct rb_node *node; + + node = rb_find_first((void *)cookie, &rq->core_tree, rb_sched_core_cmp); + /* + * The idle task always matches any cookie! + */ + if (!node) + return idle_sched_class.pick_task(rq); + + return __node_2_sc(node); +} + /* * Magic required such that: * @@ -147,10 +274,16 @@ static void __sched_core_flip(bool enabled) cpus_read_unlock(); } -static void __sched_core_enable(void) +static void sched_core_assert_empty(void) { - // XXX verify there are no cookie tasks (yet) + int cpu; + for_each_possible_cpu(cpu) + WARN_ON_ONCE(!RB_EMPTY_ROOT(&cpu_rq(cpu)->core_tree)); +} + +static void __sched_core_enable(void) +{ static_branch_enable(&__sched_core_enabled); /* * Ensure all previous instances of raw_spin_rq_*lock() have finished @@ -158,12 +291,12 @@ static void __sched_core_enable(void) */ synchronize_rcu(); __sched_core_flip(true); + sched_core_assert_empty(); } static void __sched_core_disable(void) { - // XXX verify there are no cookie tasks (left) - + sched_core_assert_empty(); __sched_core_flip(false); static_branch_disable(&__sched_core_enabled); } @@ -205,6 +338,11 @@ void sched_core_put(void) schedule_work(&_work); } +#else /* !CONFIG_SCHED_CORE */ + +static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { } +static inline void sched_core_dequeue(struct rq *rq, struct task_struct *p) { } + #endif /* CONFIG_SCHED_CORE */ /* @@ -1797,10 +1935,16 @@ static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags) uclamp_rq_inc(rq, p); p->sched_class->enqueue_task(rq, p, flags); + + if (sched_core_enabled(rq)) + sched_core_enqueue(rq, p); } static inline void dequeue_task(struct rq *rq, struct task_struct *p, int flags) { + if (sched_core_enabled(rq)) + sched_core_dequeue(rq, p); + if (!(flags & DEQUEUE_NOCLOCK)) update_rq_clock(rq); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 51d72ab5b5ae3..08be7a2eb05b4 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -268,33 +268,11 @@ const struct sched_class fair_sched_class; */ #ifdef CONFIG_FAIR_GROUP_SCHED -static inline struct task_struct *task_of(struct sched_entity *se) -{ - SCHED_WARN_ON(!entity_is_task(se)); - return container_of(se, struct task_struct, se); -} /* Walk up scheduling entities hierarchy */ #define for_each_sched_entity(se) \ for (; se; se = se->parent) -static inline struct cfs_rq *task_cfs_rq(struct task_struct *p) -{ - return p->se.cfs_rq; -} - -/* runqueue on which this entity is (to be) queued */ -static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se) -{ - return se->cfs_rq; -} - -/* runqueue "owned" by this group */ -static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) -{ - return grp->my_q; -} - static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len) { if (!path) @@ -455,33 +433,9 @@ find_matching_se(struct sched_entity **se, struct sched_entity **pse) #else /* !CONFIG_FAIR_GROUP_SCHED */ -static inline struct task_struct *task_of(struct sched_entity *se) -{ - return container_of(se, struct task_struct, se); -} - #define for_each_sched_entity(se) \ for (; se; se = NULL) -static inline struct cfs_rq *task_cfs_rq(struct task_struct *p) -{ - return &task_rq(p)->cfs; -} - -static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se) -{ - struct task_struct *p = task_of(se); - struct rq *rq = task_rq(p); - - return &rq->cfs; -} - -/* runqueue "owned" by this group */ -static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) -{ - return NULL; -} - static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len) { if (path) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fa990cd259ceb..e43a2176d88f3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1080,6 +1080,10 @@ struct rq { /* per rq */ struct rq *core; unsigned int core_enabled; + struct rb_root core_tree; + + /* shared state */ + unsigned int core_task_seq; #endif }; @@ -1243,6 +1247,57 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); #define cpu_curr(cpu) (cpu_rq(cpu)->curr) #define raw_rq() raw_cpu_ptr(&runqueues) +#ifdef CONFIG_FAIR_GROUP_SCHED +static inline struct task_struct *task_of(struct sched_entity *se) +{ + SCHED_WARN_ON(!entity_is_task(se)); + return container_of(se, struct task_struct, se); +} + +static inline struct cfs_rq *task_cfs_rq(struct task_struct *p) +{ + return p->se.cfs_rq; +} + +/* runqueue on which this entity is (to be) queued */ +static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se) +{ + return se->cfs_rq; +} + +/* runqueue "owned" by this group */ +static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) +{ + return grp->my_q; +} + +#else + +static inline struct task_struct *task_of(struct sched_entity *se) +{ + return container_of(se, struct task_struct, se); +} + +static inline struct cfs_rq *task_cfs_rq(struct task_struct *p) +{ + return &task_rq(p)->cfs; +} + +static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se) +{ + struct task_struct *p = task_of(se); + struct rq *rq = task_rq(p); + + return &rq->cfs; +} + +/* runqueue "owned" by this group */ +static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) +{ + return NULL; +} +#endif + extern void update_rq_clock(struct rq *rq); static inline u64 __rq_clock_broken(struct rq *rq) -- GitLab From 539f65125d20aacab54d02d77f10a839f45b09dc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 18:19:37 -0500 Subject: [PATCH 0450/3804] sched: Add core wide task selection and scheduling Instead of only selecting a local task, select a task for all SMT siblings for every reschedule on the core (irrespective which logical CPU does the reschedule). Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.557559654@infradead.org --- kernel/sched/core.c | 301 ++++++++++++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 6 +- 2 files changed, 305 insertions(+), 2 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c057d471c025d..db763f42a4b0f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5282,7 +5282,7 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev, * Pick up the highest-prio task: */ static inline struct task_struct * -pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +__pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { const struct sched_class *class; struct task_struct *p; @@ -5323,6 +5323,294 @@ restart: } #ifdef CONFIG_SCHED_CORE +static inline bool is_task_rq_idle(struct task_struct *t) +{ + return (task_rq(t)->idle == t); +} + +static inline bool cookie_equals(struct task_struct *a, unsigned long cookie) +{ + return is_task_rq_idle(a) || (a->core_cookie == cookie); +} + +static inline bool cookie_match(struct task_struct *a, struct task_struct *b) +{ + if (is_task_rq_idle(a) || is_task_rq_idle(b)) + return true; + + return a->core_cookie == b->core_cookie; +} + +// XXX fairness/fwd progress conditions +/* + * Returns + * - NULL if there is no runnable task for this class. + * - the highest priority task for this runqueue if it matches + * rq->core->core_cookie or its priority is greater than max. + * - Else returns idle_task. + */ +static struct task_struct * +pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max) +{ + struct task_struct *class_pick, *cookie_pick; + unsigned long cookie = rq->core->core_cookie; + + class_pick = class->pick_task(rq); + if (!class_pick) + return NULL; + + if (!cookie) { + /* + * If class_pick is tagged, return it only if it has + * higher priority than max. + */ + if (max && class_pick->core_cookie && + prio_less(class_pick, max)) + return idle_sched_class.pick_task(rq); + + return class_pick; + } + + /* + * If class_pick is idle or matches cookie, return early. + */ + if (cookie_equals(class_pick, cookie)) + return class_pick; + + cookie_pick = sched_core_find(rq, cookie); + + /* + * If class > max && class > cookie, it is the highest priority task on + * the core (so far) and it must be selected, otherwise we must go with + * the cookie pick in order to satisfy the constraint. + */ + if (prio_less(cookie_pick, class_pick) && + (!max || prio_less(max, class_pick))) + return class_pick; + + return cookie_pick; +} + +static struct task_struct * +pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +{ + struct task_struct *next, *max = NULL; + const struct sched_class *class; + const struct cpumask *smt_mask; + bool need_sync; + int i, j, cpu; + + if (!sched_core_enabled(rq)) + return __pick_next_task(rq, prev, rf); + + cpu = cpu_of(rq); + + /* Stopper task is switching into idle, no need core-wide selection. */ + if (cpu_is_offline(cpu)) { + /* + * Reset core_pick so that we don't enter the fastpath when + * coming online. core_pick would already be migrated to + * another cpu during offline. + */ + rq->core_pick = NULL; + return __pick_next_task(rq, prev, rf); + } + + /* + * If there were no {en,de}queues since we picked (IOW, the task + * pointers are all still valid), and we haven't scheduled the last + * pick yet, do so now. + * + * rq->core_pick can be NULL if no selection was made for a CPU because + * it was either offline or went offline during a sibling's core-wide + * selection. In this case, do a core-wide selection. + */ + if (rq->core->core_pick_seq == rq->core->core_task_seq && + rq->core->core_pick_seq != rq->core_sched_seq && + rq->core_pick) { + WRITE_ONCE(rq->core_sched_seq, rq->core->core_pick_seq); + + next = rq->core_pick; + if (next != prev) { + put_prev_task(rq, prev); + set_next_task(rq, next); + } + + rq->core_pick = NULL; + return next; + } + + put_prev_task_balance(rq, prev, rf); + + smt_mask = cpu_smt_mask(cpu); + + /* + * core->core_task_seq, core->core_pick_seq, rq->core_sched_seq + * + * @task_seq guards the task state ({en,de}queues) + * @pick_seq is the @task_seq we did a selection on + * @sched_seq is the @pick_seq we scheduled + * + * However, preemptions can cause multiple picks on the same task set. + * 'Fix' this by also increasing @task_seq for every pick. + */ + rq->core->core_task_seq++; + need_sync = !!rq->core->core_cookie; + + /* reset state */ + rq->core->core_cookie = 0UL; + for_each_cpu(i, smt_mask) { + struct rq *rq_i = cpu_rq(i); + + rq_i->core_pick = NULL; + + if (rq_i->core_forceidle) { + need_sync = true; + rq_i->core_forceidle = false; + } + + if (i != cpu) + update_rq_clock(rq_i); + } + + /* + * Try and select tasks for each sibling in decending sched_class + * order. + */ + for_each_class(class) { +again: + for_each_cpu_wrap(i, smt_mask, cpu) { + struct rq *rq_i = cpu_rq(i); + struct task_struct *p; + + if (rq_i->core_pick) + continue; + + /* + * If this sibling doesn't yet have a suitable task to + * run; ask for the most elegible task, given the + * highest priority task already selected for this + * core. + */ + p = pick_task(rq_i, class, max); + if (!p) { + /* + * If there weren't no cookies; we don't need to + * bother with the other siblings. + * If the rest of the core is not running a tagged + * task, i.e. need_sync == 0, and the current CPU + * which called into the schedule() loop does not + * have any tasks for this class, skip selecting for + * other siblings since there's no point. We don't skip + * for RT/DL because that could make CFS force-idle RT. + */ + if (i == cpu && !need_sync && class == &fair_sched_class) + goto next_class; + + continue; + } + + /* + * Optimize the 'normal' case where there aren't any + * cookies and we don't need to sync up. + */ + if (i == cpu && !need_sync && !p->core_cookie) { + next = p; + goto done; + } + + rq_i->core_pick = p; + + /* + * If this new candidate is of higher priority than the + * previous; and they're incompatible; we need to wipe + * the slate and start over. pick_task makes sure that + * p's priority is more than max if it doesn't match + * max's cookie. + * + * NOTE: this is a linear max-filter and is thus bounded + * in execution time. + */ + if (!max || !cookie_match(max, p)) { + struct task_struct *old_max = max; + + rq->core->core_cookie = p->core_cookie; + max = p; + + if (old_max) { + for_each_cpu(j, smt_mask) { + if (j == i) + continue; + + cpu_rq(j)->core_pick = NULL; + } + goto again; + } else { + /* + * Once we select a task for a cpu, we + * should not be doing an unconstrained + * pick because it might starve a task + * on a forced idle cpu. + */ + need_sync = true; + } + + } + } +next_class:; + } + + rq->core->core_pick_seq = rq->core->core_task_seq; + next = rq->core_pick; + rq->core_sched_seq = rq->core->core_pick_seq; + + /* Something should have been selected for current CPU */ + WARN_ON_ONCE(!next); + + /* + * Reschedule siblings + * + * NOTE: L1TF -- at this point we're no longer running the old task and + * sending an IPI (below) ensures the sibling will no longer be running + * their task. This ensures there is no inter-sibling overlap between + * non-matching user state. + */ + for_each_cpu(i, smt_mask) { + struct rq *rq_i = cpu_rq(i); + + /* + * An online sibling might have gone offline before a task + * could be picked for it, or it might be offline but later + * happen to come online, but its too late and nothing was + * picked for it. That's Ok - it will pick tasks for itself, + * so ignore it. + */ + if (!rq_i->core_pick) + continue; + + if (is_task_rq_idle(rq_i->core_pick) && rq_i->nr_running) + rq_i->core_forceidle = true; + + if (i == cpu) { + rq_i->core_pick = NULL; + continue; + } + + /* Did we break L1TF mitigation requirements? */ + WARN_ON_ONCE(!cookie_match(next, rq_i->core_pick)); + + if (rq_i->curr == rq_i->core_pick) { + rq_i->core_pick = NULL; + continue; + } + + resched_curr(rq_i); + } + +done: + set_next_task(rq, next); + return next; +} static inline void sched_core_cpu_starting(unsigned int cpu) { @@ -5354,6 +5642,12 @@ static inline void sched_core_cpu_starting(unsigned int cpu) static inline void sched_core_cpu_starting(unsigned int cpu) {} +static struct task_struct * +pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) +{ + return __pick_next_task(rq, prev, rf); +} + #endif /* CONFIG_SCHED_CORE */ /* @@ -8609,7 +8903,12 @@ void __init sched_init(void) #ifdef CONFIG_SCHED_CORE rq->core = NULL; + rq->core_pick = NULL; rq->core_enabled = 0; + rq->core_tree = RB_ROOT; + rq->core_forceidle = false; + + rq->core_cookie = 0UL; #endif } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e43a2176d88f3..dd44a3127e9ca 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1079,11 +1079,16 @@ struct rq { #ifdef CONFIG_SCHED_CORE /* per rq */ struct rq *core; + struct task_struct *core_pick; unsigned int core_enabled; + unsigned int core_sched_seq; struct rb_root core_tree; + unsigned char core_forceidle; /* shared state */ unsigned int core_task_seq; + unsigned int core_pick_seq; + unsigned long core_cookie; #endif }; @@ -2060,7 +2065,6 @@ static inline void put_prev_task(struct rq *rq, struct task_struct *prev) static inline void set_next_task(struct rq *rq, struct task_struct *next) { - WARN_ON_ONCE(rq->curr != next); next->sched_class->set_next_task(rq, next, false); } -- GitLab From 8039e96fcc1de30d5bcaf05da9ca2de46a800826 Mon Sep 17 00:00:00 2001 From: Vineeth Pillai Date: Tue, 17 Nov 2020 18:19:38 -0500 Subject: [PATCH 0451/3804] sched/fair: Fix forced idle sibling starvation corner case If there is only one long running local task and the sibling is forced idle, it might not get a chance to run until a schedule event happens on any cpu in the core. So we check for this condition during a tick to see if a sibling is starved and then give it a chance to schedule. Signed-off-by: Vineeth Pillai Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.617407840@infradead.org --- kernel/sched/core.c | 15 ++++++++------- kernel/sched/fair.c | 40 ++++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 2 +- 3 files changed, 49 insertions(+), 8 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index db763f42a4b0f..f5e1e6f96411b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5459,16 +5459,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) /* reset state */ rq->core->core_cookie = 0UL; + if (rq->core->core_forceidle) { + need_sync = true; + rq->core->core_forceidle = false; + } for_each_cpu(i, smt_mask) { struct rq *rq_i = cpu_rq(i); rq_i->core_pick = NULL; - if (rq_i->core_forceidle) { - need_sync = true; - rq_i->core_forceidle = false; - } - if (i != cpu) update_rq_clock(rq_i); } @@ -5588,8 +5587,10 @@ next_class:; if (!rq_i->core_pick) continue; - if (is_task_rq_idle(rq_i->core_pick) && rq_i->nr_running) - rq_i->core_forceidle = true; + if (is_task_rq_idle(rq_i->core_pick) && rq_i->nr_running && + !rq_i->core->core_forceidle) { + rq_i->core->core_forceidle = true; + } if (i == cpu) { rq_i->core_pick = NULL; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 08be7a2eb05b4..4d1ecab41e804 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10767,6 +10767,44 @@ static void rq_offline_fair(struct rq *rq) #endif /* CONFIG_SMP */ +#ifdef CONFIG_SCHED_CORE +static inline bool +__entity_slice_used(struct sched_entity *se, int min_nr_tasks) +{ + u64 slice = sched_slice(cfs_rq_of(se), se); + u64 rtime = se->sum_exec_runtime - se->prev_sum_exec_runtime; + + return (rtime * min_nr_tasks > slice); +} + +#define MIN_NR_TASKS_DURING_FORCEIDLE 2 +static inline void task_tick_core(struct rq *rq, struct task_struct *curr) +{ + if (!sched_core_enabled(rq)) + return; + + /* + * If runqueue has only one task which used up its slice and + * if the sibling is forced idle, then trigger schedule to + * give forced idle task a chance. + * + * sched_slice() considers only this active rq and it gets the + * whole slice. But during force idle, we have siblings acting + * like a single runqueue and hence we need to consider runnable + * tasks on this cpu and the forced idle cpu. Ideally, we should + * go through the forced idle rq, but that would be a perf hit. + * We can assume that the forced idle cpu has atleast + * MIN_NR_TASKS_DURING_FORCEIDLE - 1 tasks and use that to check + * if we need to give up the cpu. + */ + if (rq->core->core_forceidle && rq->cfs.nr_running == 1 && + __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE)) + resched_curr(rq); +} +#else +static inline void task_tick_core(struct rq *rq, struct task_struct *curr) {} +#endif + /* * scheduler tick hitting a task of our scheduling class. * @@ -10790,6 +10828,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued) update_misfit_status(curr, rq); update_overutilized_status(task_rq(curr)); + + task_tick_core(rq, curr); } /* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index dd44a3127e9ca..db555143380d3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1083,12 +1083,12 @@ struct rq { unsigned int core_enabled; unsigned int core_sched_seq; struct rb_root core_tree; - unsigned char core_forceidle; /* shared state */ unsigned int core_task_seq; unsigned int core_pick_seq; unsigned long core_cookie; + unsigned char core_forceidle; #endif }; -- GitLab From 7afbba119f0da09824d723f8081608ea1f74ff57 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 17 Nov 2020 18:19:42 -0500 Subject: [PATCH 0452/3804] sched: Fix priority inversion of cookied task with sibling The rationale is as follows. In the core-wide pick logic, even if need_sync == false, we need to go look at other CPUs (non-local CPUs) to see if they could be running RT. Say the RQs in a particular core look like this: Let CFS1 and CFS2 be 2 tagged CFS tags. Let RT1 be an untagged RT task. rq0 rq1 CFS1 (tagged) RT1 (no tag) CFS2 (tagged) Say schedule() runs on rq0. Now, it will enter the above loop and pick_task(RT) will return NULL for 'p'. It will enter the above if() block and see that need_sync == false and will skip RT entirely. The end result of the selection will be (say prio(CFS1) > prio(CFS2)): rq0 rq1 CFS1 IDLE When it should have selected: rq0 rq1 IDLE RT Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Joel Fernandes (Google) Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.678425748@infradead.org --- kernel/sched/core.c | 65 ++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 39 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f5e1e6f96411b..e506d9de16fcc 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5443,6 +5443,15 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) put_prev_task_balance(rq, prev, rf); smt_mask = cpu_smt_mask(cpu); + need_sync = !!rq->core->core_cookie; + + /* reset state */ + rq->core->core_cookie = 0UL; + if (rq->core->core_forceidle) { + need_sync = true; + fi_before = true; + rq->core->core_forceidle = false; + } /* * core->core_task_seq, core->core_pick_seq, rq->core_sched_seq @@ -5455,14 +5464,25 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) * 'Fix' this by also increasing @task_seq for every pick. */ rq->core->core_task_seq++; - need_sync = !!rq->core->core_cookie; - /* reset state */ - rq->core->core_cookie = 0UL; - if (rq->core->core_forceidle) { + /* + * Optimize for common case where this CPU has no cookies + * and there are no cookied tasks running on siblings. + */ + if (!need_sync) { + for_each_class(class) { + next = class->pick_task(rq); + if (next) + break; + } + + if (!next->core_cookie) { + rq->core_pick = NULL; + goto done; + } need_sync = true; - rq->core->core_forceidle = false; } + for_each_cpu(i, smt_mask) { struct rq *rq_i = cpu_rq(i); @@ -5492,31 +5512,8 @@ again: * core. */ p = pick_task(rq_i, class, max); - if (!p) { - /* - * If there weren't no cookies; we don't need to - * bother with the other siblings. - * If the rest of the core is not running a tagged - * task, i.e. need_sync == 0, and the current CPU - * which called into the schedule() loop does not - * have any tasks for this class, skip selecting for - * other siblings since there's no point. We don't skip - * for RT/DL because that could make CFS force-idle RT. - */ - if (i == cpu && !need_sync && class == &fair_sched_class) - goto next_class; - + if (!p) continue; - } - - /* - * Optimize the 'normal' case where there aren't any - * cookies and we don't need to sync up. - */ - if (i == cpu && !need_sync && !p->core_cookie) { - next = p; - goto done; - } rq_i->core_pick = p; @@ -5544,19 +5541,9 @@ again: cpu_rq(j)->core_pick = NULL; } goto again; - } else { - /* - * Once we select a task for a cpu, we - * should not be doing an unconstrained - * pick because it might starve a task - * on a forced idle cpu. - */ - need_sync = true; } - } } -next_class:; } rq->core->core_pick_seq = rq->core->core_task_seq; -- GitLab From c6047c2e3af68dae23ad884249e0d42ff28d2d1b Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Tue, 17 Nov 2020 18:19:39 -0500 Subject: [PATCH 0453/3804] sched/fair: Snapshot the min_vruntime of CPUs on force idle During force-idle, we end up doing cross-cpu comparison of vruntimes during pick_next_task. If we simply compare (vruntime-min_vruntime) across CPUs, and if the CPUs only have 1 task each, we will always end up comparing 0 with 0 and pick just one of the tasks all the time. This starves the task that was not picked. To fix this, take a snapshot of the min_vruntime when entering force idle and use it for comparison. This min_vruntime snapshot will only be used for cross-CPU vruntime comparison, and nothing else. A note about the min_vruntime snapshot and force idling: During selection: When we're not fi, we need to update snapshot. when we're fi and we were not fi, we must update snapshot. When we're fi and we were already fi, we must not update snapshot. Which gives: fib fi update 0 0 1 0 1 1 1 0 1 1 1 0 Where: fi: force-idled now fib: force-idled before So the min_vruntime snapshot needs to be updated when: !(fib && fi). Also, the cfs_prio_less() function needs to be aware of whether the core is in force idle or not, since it will be use this information to know whether to advance a cfs_rq's min_vruntime_fi in the hierarchy. So pass this information along via pick_task() -> prio_less(). Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Joel Fernandes (Google) Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.738542617@infradead.org --- kernel/sched/core.c | 59 +++++++++++++++++++--------------- kernel/sched/fair.c | 75 ++++++++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 8 +++++ 3 files changed, 117 insertions(+), 25 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e506d9de16fcc..e45c1d21b3714 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -111,7 +111,7 @@ static inline int __task_prio(struct task_struct *p) */ /* real prio, less is less */ -static inline bool prio_less(struct task_struct *a, struct task_struct *b) +static inline bool prio_less(struct task_struct *a, struct task_struct *b, bool in_fi) { int pa = __task_prio(a), pb = __task_prio(b); @@ -125,19 +125,8 @@ static inline bool prio_less(struct task_struct *a, struct task_struct *b) if (pa == -1) /* dl_prio() doesn't work because of stop_class above */ return !dl_time_before(a->dl.deadline, b->dl.deadline); - if (pa == MAX_RT_PRIO + MAX_NICE) { /* fair */ - u64 vruntime = b->se.vruntime; - - /* - * Normalize the vruntime if tasks are in different cpus. - */ - if (task_cpu(a) != task_cpu(b)) { - vruntime -= task_cfs_rq(b)->min_vruntime; - vruntime += task_cfs_rq(a)->min_vruntime; - } - - return !((s64)(a->se.vruntime - vruntime) <= 0); - } + if (pa == MAX_RT_PRIO + MAX_NICE) /* fair */ + return cfs_prio_less(a, b, in_fi); return false; } @@ -151,7 +140,7 @@ static inline bool __sched_core_less(struct task_struct *a, struct task_struct * return false; /* flip prio, so high prio is leftmost */ - if (prio_less(b, a)) + if (prio_less(b, a, task_rq(a)->core->core_forceidle)) return true; return false; @@ -5350,7 +5339,7 @@ static inline bool cookie_match(struct task_struct *a, struct task_struct *b) * - Else returns idle_task. */ static struct task_struct * -pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max) +pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *max, bool in_fi) { struct task_struct *class_pick, *cookie_pick; unsigned long cookie = rq->core->core_cookie; @@ -5365,7 +5354,7 @@ pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *ma * higher priority than max. */ if (max && class_pick->core_cookie && - prio_less(class_pick, max)) + prio_less(class_pick, max, in_fi)) return idle_sched_class.pick_task(rq); return class_pick; @@ -5384,19 +5373,22 @@ pick_task(struct rq *rq, const struct sched_class *class, struct task_struct *ma * the core (so far) and it must be selected, otherwise we must go with * the cookie pick in order to satisfy the constraint. */ - if (prio_less(cookie_pick, class_pick) && - (!max || prio_less(max, class_pick))) + if (prio_less(cookie_pick, class_pick, in_fi) && + (!max || prio_less(max, class_pick, in_fi))) return class_pick; return cookie_pick; } +extern void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi); + static struct task_struct * pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { struct task_struct *next, *max = NULL; const struct sched_class *class; const struct cpumask *smt_mask; + bool fi_before = false; bool need_sync; int i, j, cpu; @@ -5478,9 +5470,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (!next->core_cookie) { rq->core_pick = NULL; + /* + * For robustness, update the min_vruntime_fi for + * unconstrained picks as well. + */ + WARN_ON_ONCE(fi_before); + task_vruntime_update(rq, next, false); goto done; } - need_sync = true; } for_each_cpu(i, smt_mask) { @@ -5511,11 +5508,16 @@ again: * highest priority task already selected for this * core. */ - p = pick_task(rq_i, class, max); + p = pick_task(rq_i, class, max, fi_before); if (!p) continue; rq_i->core_pick = p; + if (rq_i->idle == p && rq_i->nr_running) { + rq->core->core_forceidle = true; + if (!fi_before) + rq->core->core_forceidle_seq++; + } /* * If this new candidate is of higher priority than the @@ -5534,6 +5536,7 @@ again: max = p; if (old_max) { + rq->core->core_forceidle = false; for_each_cpu(j, smt_mask) { if (j == i) continue; @@ -5574,10 +5577,16 @@ again: if (!rq_i->core_pick) continue; - if (is_task_rq_idle(rq_i->core_pick) && rq_i->nr_running && - !rq_i->core->core_forceidle) { - rq_i->core->core_forceidle = true; - } + /* + * Update for new !FI->FI transitions, or if continuing to be in !FI: + * fi_before fi update? + * 0 0 1 + * 0 1 1 + * 1 0 1 + * 1 1 0 + */ + if (!(fi_before && rq->core->core_forceidle)) + task_vruntime_update(rq_i, rq_i->core_pick, rq->core->core_forceidle); if (i == cpu) { rq_i->core_pick = NULL; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4d1ecab41e804..5948dc17b9ccb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10801,6 +10801,81 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr) __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE)) resched_curr(rq); } + +/* + * se_fi_update - Update the cfs_rq->min_vruntime_fi in a CFS hierarchy if needed. + */ +static void se_fi_update(struct sched_entity *se, unsigned int fi_seq, bool forceidle) +{ + for_each_sched_entity(se) { + struct cfs_rq *cfs_rq = cfs_rq_of(se); + + if (forceidle) { + if (cfs_rq->forceidle_seq == fi_seq) + break; + cfs_rq->forceidle_seq = fi_seq; + } + + cfs_rq->min_vruntime_fi = cfs_rq->min_vruntime; + } +} + +void task_vruntime_update(struct rq *rq, struct task_struct *p, bool in_fi) +{ + struct sched_entity *se = &p->se; + + if (p->sched_class != &fair_sched_class) + return; + + se_fi_update(se, rq->core->core_forceidle_seq, in_fi); +} + +bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_fi) +{ + struct rq *rq = task_rq(a); + struct sched_entity *sea = &a->se; + struct sched_entity *seb = &b->se; + struct cfs_rq *cfs_rqa; + struct cfs_rq *cfs_rqb; + s64 delta; + + SCHED_WARN_ON(task_rq(b)->core != rq->core); + +#ifdef CONFIG_FAIR_GROUP_SCHED + /* + * Find an se in the hierarchy for tasks a and b, such that the se's + * are immediate siblings. + */ + while (sea->cfs_rq->tg != seb->cfs_rq->tg) { + int sea_depth = sea->depth; + int seb_depth = seb->depth; + + if (sea_depth >= seb_depth) + sea = parent_entity(sea); + if (sea_depth <= seb_depth) + seb = parent_entity(seb); + } + + se_fi_update(sea, rq->core->core_forceidle_seq, in_fi); + se_fi_update(seb, rq->core->core_forceidle_seq, in_fi); + + cfs_rqa = sea->cfs_rq; + cfs_rqb = seb->cfs_rq; +#else + cfs_rqa = &task_rq(a)->cfs; + cfs_rqb = &task_rq(b)->cfs; +#endif + + /* + * Find delta after normalizing se's vruntime with its cfs_rq's + * min_vruntime_fi, which would have been updated in prior calls + * to se_fi_update(). + */ + delta = (s64)(sea->vruntime - seb->vruntime) + + (s64)(cfs_rqb->min_vruntime_fi - cfs_rqa->min_vruntime_fi); + + return delta > 0; +} #else static inline void task_tick_core(struct rq *rq, struct task_struct *curr) {} #endif diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index db555143380d3..4a898abc60ce2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -526,6 +526,11 @@ struct cfs_rq { u64 exec_clock; u64 min_vruntime; +#ifdef CONFIG_SCHED_CORE + unsigned int forceidle_seq; + u64 min_vruntime_fi; +#endif + #ifndef CONFIG_64BIT u64 min_vruntime_copy; #endif @@ -1089,6 +1094,7 @@ struct rq { unsigned int core_pick_seq; unsigned long core_cookie; unsigned char core_forceidle; + unsigned int core_forceidle_seq; #endif }; @@ -1162,6 +1168,8 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq) return &rq->__lock; } +bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi); + #else /* !CONFIG_SCHED_CORE */ static inline bool sched_core_enabled(struct rq *rq) -- GitLab From d2dfa17bc7de67e99685c4d6557837bf801a102c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 17 Nov 2020 18:19:43 -0500 Subject: [PATCH 0454/3804] sched: Trivial forced-newidle balancer When a sibling is forced-idle to match the core-cookie; search for matching tasks to fill the core. rcu_read_unlock() can incur an infrequent deadlock in sched_core_balance(). Fix this by using the RCU-sched flavor instead. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.800048269@infradead.org --- include/linux/sched.h | 1 + kernel/sched/core.c | 130 +++++++++++++++++++++++++++++++++++++++++- kernel/sched/idle.c | 1 + kernel/sched/sched.h | 6 ++ 4 files changed, 137 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 45eedccf86aae..9b822e3832123 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -705,6 +705,7 @@ struct task_struct { #ifdef CONFIG_SCHED_CORE struct rb_node core_node; unsigned long core_cookie; + unsigned int core_occupation; #endif #ifdef CONFIG_CGROUP_SCHED diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e45c1d21b3714..b4988887510fd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -204,6 +204,21 @@ static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie) return __node_2_sc(node); } +static struct task_struct *sched_core_next(struct task_struct *p, unsigned long cookie) +{ + struct rb_node *node = &p->core_node; + + node = rb_next(node); + if (!node) + return NULL; + + p = container_of(node, struct task_struct, core_node); + if (p->core_cookie != cookie) + return NULL; + + return p; +} + /* * Magic required such that: * @@ -5389,8 +5404,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) const struct sched_class *class; const struct cpumask *smt_mask; bool fi_before = false; + int i, j, cpu, occ = 0; bool need_sync; - int i, j, cpu; if (!sched_core_enabled(rq)) return __pick_next_task(rq, prev, rf); @@ -5512,6 +5527,9 @@ again: if (!p) continue; + if (!is_task_rq_idle(p)) + occ++; + rq_i->core_pick = p; if (rq_i->idle == p && rq_i->nr_running) { rq->core->core_forceidle = true; @@ -5543,6 +5561,7 @@ again: cpu_rq(j)->core_pick = NULL; } + occ = 1; goto again; } } @@ -5588,6 +5607,8 @@ again: if (!(fi_before && rq->core->core_forceidle)) task_vruntime_update(rq_i, rq_i->core_pick, rq->core->core_forceidle); + rq_i->core_pick->core_occupation = occ; + if (i == cpu) { rq_i->core_pick = NULL; continue; @@ -5609,6 +5630,113 @@ done: return next; } +static bool try_steal_cookie(int this, int that) +{ + struct rq *dst = cpu_rq(this), *src = cpu_rq(that); + struct task_struct *p; + unsigned long cookie; + bool success = false; + + local_irq_disable(); + double_rq_lock(dst, src); + + cookie = dst->core->core_cookie; + if (!cookie) + goto unlock; + + if (dst->curr != dst->idle) + goto unlock; + + p = sched_core_find(src, cookie); + if (p == src->idle) + goto unlock; + + do { + if (p == src->core_pick || p == src->curr) + goto next; + + if (!cpumask_test_cpu(this, &p->cpus_mask)) + goto next; + + if (p->core_occupation > dst->idle->core_occupation) + goto next; + + p->on_rq = TASK_ON_RQ_MIGRATING; + deactivate_task(src, p, 0); + set_task_cpu(p, this); + activate_task(dst, p, 0); + p->on_rq = TASK_ON_RQ_QUEUED; + + resched_curr(dst); + + success = true; + break; + +next: + p = sched_core_next(p, cookie); + } while (p); + +unlock: + double_rq_unlock(dst, src); + local_irq_enable(); + + return success; +} + +static bool steal_cookie_task(int cpu, struct sched_domain *sd) +{ + int i; + + for_each_cpu_wrap(i, sched_domain_span(sd), cpu) { + if (i == cpu) + continue; + + if (need_resched()) + break; + + if (try_steal_cookie(cpu, i)) + return true; + } + + return false; +} + +static void sched_core_balance(struct rq *rq) +{ + struct sched_domain *sd; + int cpu = cpu_of(rq); + + preempt_disable(); + rcu_read_lock(); + raw_spin_rq_unlock_irq(rq); + for_each_domain(cpu, sd) { + if (need_resched()) + break; + + if (steal_cookie_task(cpu, sd)) + break; + } + raw_spin_rq_lock_irq(rq); + rcu_read_unlock(); + preempt_enable(); +} + +static DEFINE_PER_CPU(struct callback_head, core_balance_head); + +void queue_core_balance(struct rq *rq) +{ + if (!sched_core_enabled(rq)) + return; + + if (!rq->core->core_cookie) + return; + + if (!rq->nr_running) /* not forced idle */ + return; + + queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance); +} + static inline void sched_core_cpu_starting(unsigned int cpu) { const struct cpumask *smt_mask = cpu_smt_mask(cpu); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 43646e7876d91..912b47aa99d82 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -437,6 +437,7 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir { update_idle_core(rq); schedstat_inc(rq->sched_goidle); + queue_core_balance(rq); } #ifdef CONFIG_SMP diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4a898abc60ce2..91ca1fee9fec2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1170,6 +1170,8 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq) bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi); +extern void queue_core_balance(struct rq *rq); + #else /* !CONFIG_SCHED_CORE */ static inline bool sched_core_enabled(struct rq *rq) @@ -1192,6 +1194,10 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq) return &rq->__lock; } +static inline void queue_core_balance(struct rq *rq) +{ +} + #endif /* CONFIG_SCHED_CORE */ static inline void lockdep_assert_rq_held(struct rq *rq) -- GitLab From 97886d9dcd86820bdbc1fa73b455982809cbc8c2 Mon Sep 17 00:00:00 2001 From: Aubrey Li Date: Wed, 24 Mar 2021 17:40:13 -0400 Subject: [PATCH 0455/3804] sched: Migration changes for core scheduling - Don't migrate if there is a cookie mismatch Load balance tries to move task from busiest CPU to the destination CPU. When core scheduling is enabled, if the task's cookie does not match with the destination CPU's core cookie, this task may be skipped by this CPU. This mitigates the forced idle time on the destination CPU. - Select cookie matched idle CPU In the fast path of task wakeup, select the first cookie matched idle CPU instead of the first idle CPU. - Find cookie matched idlest CPU In the slow path of task wakeup, find the idlest CPU whose core cookie matches with task's cookie Signed-off-by: Aubrey Li Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.860083871@infradead.org --- kernel/sched/fair.c | 29 ++++++++++++++---- kernel/sched/sched.h | 73 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 6 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5948dc17b9ccb..2635e10484213 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5889,11 +5889,15 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this /* Traverse only the allowed CPUs */ for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) { + struct rq *rq = cpu_rq(i); + + if (!sched_core_cookie_match(rq, p)) + continue; + if (sched_idle_cpu(i)) return i; if (available_idle_cpu(i)) { - struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); if (idle && idle->exit_latency < min_exit_latency) { /* @@ -5979,9 +5983,10 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p return new_cpu; } -static inline int __select_idle_cpu(int cpu) +static inline int __select_idle_cpu(int cpu, struct task_struct *p) { - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) + if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) && + sched_cpu_cookie_match(cpu_rq(cpu), p)) return cpu; return -1; @@ -6051,7 +6056,7 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu int cpu; if (!static_branch_likely(&sched_smt_present)) - return __select_idle_cpu(core); + return __select_idle_cpu(core, p); for_each_cpu(cpu, cpu_smt_mask(core)) { if (!available_idle_cpu(cpu)) { @@ -6107,7 +6112,7 @@ static inline bool test_idle_cores(int cpu, bool def) static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu) { - return __select_idle_cpu(core); + return __select_idle_cpu(core, p); } static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) @@ -6164,7 +6169,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool } else { if (!--nr) return -1; - idle_cpu = __select_idle_cpu(cpu); + idle_cpu = __select_idle_cpu(cpu, p); if ((unsigned int)idle_cpu < nr_cpumask_bits) break; } @@ -7527,6 +7532,14 @@ static int task_hot(struct task_struct *p, struct lb_env *env) if (sysctl_sched_migration_cost == -1) return 1; + + /* + * Don't migrate task if the task's cookie does not match + * with the destination CPU's core cookie. + */ + if (!sched_core_cookie_match(cpu_rq(env->dst_cpu), p)) + return 1; + if (sysctl_sched_migration_cost == 0) return 0; @@ -8857,6 +8870,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) p->cpus_ptr)) continue; + /* Skip over this group if no cookie matched */ + if (!sched_group_cookie_match(cpu_rq(this_cpu), p, group)) + continue; + local_group = cpumask_test_cpu(this_cpu, sched_group_span(group)); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 91ca1fee9fec2..3878386a0a024 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1134,7 +1134,9 @@ static inline bool is_migration_disabled(struct task_struct *p) #endif } +struct sched_group; #ifdef CONFIG_SCHED_CORE +static inline struct cpumask *sched_group_span(struct sched_group *sg); DECLARE_STATIC_KEY_FALSE(__sched_core_enabled); @@ -1170,6 +1172,61 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq) bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi); +/* + * Helpers to check if the CPU's core cookie matches with the task's cookie + * when core scheduling is enabled. + * A special case is that the task's cookie always matches with CPU's core + * cookie if the CPU is in an idle core. + */ +static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p) +{ + /* Ignore cookie match if core scheduler is not enabled on the CPU. */ + if (!sched_core_enabled(rq)) + return true; + + return rq->core->core_cookie == p->core_cookie; +} + +static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p) +{ + bool idle_core = true; + int cpu; + + /* Ignore cookie match if core scheduler is not enabled on the CPU. */ + if (!sched_core_enabled(rq)) + return true; + + for_each_cpu(cpu, cpu_smt_mask(cpu_of(rq))) { + if (!available_idle_cpu(cpu)) { + idle_core = false; + break; + } + } + + /* + * A CPU in an idle core is always the best choice for tasks with + * cookies. + */ + return idle_core || rq->core->core_cookie == p->core_cookie; +} + +static inline bool sched_group_cookie_match(struct rq *rq, + struct task_struct *p, + struct sched_group *group) +{ + int cpu; + + /* Ignore cookie match if core scheduler is not enabled on the CPU. */ + if (!sched_core_enabled(rq)) + return true; + + for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) { + if (sched_core_cookie_match(rq, p)) + return true; + } + return false; +} + extern void queue_core_balance(struct rq *rq); #else /* !CONFIG_SCHED_CORE */ @@ -1198,6 +1255,22 @@ static inline void queue_core_balance(struct rq *rq) { } +static inline bool sched_cpu_cookie_match(struct rq *rq, struct task_struct *p) +{ + return true; +} + +static inline bool sched_core_cookie_match(struct rq *rq, struct task_struct *p) +{ + return true; +} + +static inline bool sched_group_cookie_match(struct rq *rq, + struct task_struct *p, + struct sched_group *group) +{ + return true; +} #endif /* CONFIG_SCHED_CORE */ static inline void lockdep_assert_rq_held(struct rq *rq) -- GitLab From 6e33cad0af49336952e5541464bd02f5b5fd433e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Mar 2021 18:55:06 +0100 Subject: [PATCH 0456/3804] sched: Trivial core scheduling cookie management In order to not have to use pid_struct, create a new, smaller, structure to manage task cookies for core scheduling. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.919768100@infradead.org --- include/linux/sched.h | 6 +++ kernel/fork.c | 1 + kernel/sched/Makefile | 1 + kernel/sched/core.c | 7 +-- kernel/sched/core_sched.c | 109 ++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 16 ++++++ 6 files changed, 137 insertions(+), 3 deletions(-) create mode 100644 kernel/sched/core_sched.c diff --git a/include/linux/sched.h b/include/linux/sched.h index 9b822e3832123..eab3f7c4251bf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2179,4 +2179,10 @@ int sched_trace_rq_nr_running(struct rq *rq); const struct cpumask *sched_trace_rd_span(struct root_domain *rd); +#ifdef CONFIG_SCHED_CORE +extern void sched_core_free(struct task_struct *tsk); +#else +static inline void sched_core_free(struct task_struct *tsk) { } +#endif + #endif diff --git a/kernel/fork.c b/kernel/fork.c index dc06afd725cbd..d16c60c9daca0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -742,6 +742,7 @@ void __put_task_struct(struct task_struct *tsk) exit_creds(tsk); delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); + sched_core_free(tsk); if (!profile_handoff_task(tsk)) free_task(tsk); diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 5fc9c9b70862f..978fcfca5871d 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -36,3 +36,4 @@ obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o obj-$(CONFIG_MEMBARRIER) += membarrier.o obj-$(CONFIG_CPU_ISOLATION) += isolation.o obj-$(CONFIG_PSI) += psi.o +obj-$(CONFIG_SCHED_CORE) += core_sched.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b4988887510fd..55b2d9399e12a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -167,7 +167,7 @@ static inline int rb_sched_core_cmp(const void *key, const struct rb_node *node) return 0; } -static void sched_core_enqueue(struct rq *rq, struct task_struct *p) +void sched_core_enqueue(struct rq *rq, struct task_struct *p) { rq->core->core_task_seq++; @@ -177,14 +177,15 @@ static void sched_core_enqueue(struct rq *rq, struct task_struct *p) rb_add(&p->core_node, &rq->core_tree, rb_sched_core_less); } -static void sched_core_dequeue(struct rq *rq, struct task_struct *p) +void sched_core_dequeue(struct rq *rq, struct task_struct *p) { rq->core->core_task_seq++; - if (!p->core_cookie) + if (!sched_core_enqueued(p)) return; rb_erase(&p->core_node, &rq->core_tree); + RB_CLEAR_NODE(&p->core_node); } /* diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c new file mode 100644 index 0000000000000..8d0869a9eb8c3 --- /dev/null +++ b/kernel/sched/core_sched.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "sched.h" + +/* + * A simple wrapper around refcount. An allocated sched_core_cookie's + * address is used to compute the cookie of the task. + */ +struct sched_core_cookie { + refcount_t refcnt; +}; + +unsigned long sched_core_alloc_cookie(void) +{ + struct sched_core_cookie *ck = kmalloc(sizeof(*ck), GFP_KERNEL); + if (!ck) + return 0; + + refcount_set(&ck->refcnt, 1); + sched_core_get(); + + return (unsigned long)ck; +} + +void sched_core_put_cookie(unsigned long cookie) +{ + struct sched_core_cookie *ptr = (void *)cookie; + + if (ptr && refcount_dec_and_test(&ptr->refcnt)) { + kfree(ptr); + sched_core_put(); + } +} + +unsigned long sched_core_get_cookie(unsigned long cookie) +{ + struct sched_core_cookie *ptr = (void *)cookie; + + if (ptr) + refcount_inc(&ptr->refcnt); + + return cookie; +} + +/* + * sched_core_update_cookie - replace the cookie on a task + * @p: the task to update + * @cookie: the new cookie + * + * Effectively exchange the task cookie; caller is responsible for lifetimes on + * both ends. + * + * Returns: the old cookie + */ +unsigned long sched_core_update_cookie(struct task_struct *p, unsigned long cookie) +{ + unsigned long old_cookie; + struct rq_flags rf; + struct rq *rq; + bool enqueued; + + rq = task_rq_lock(p, &rf); + + /* + * Since creating a cookie implies sched_core_get(), and we cannot set + * a cookie until after we've created it, similarly, we cannot destroy + * a cookie until after we've removed it, we must have core scheduling + * enabled here. + */ + SCHED_WARN_ON((p->core_cookie || cookie) && !sched_core_enabled(rq)); + + enqueued = sched_core_enqueued(p); + if (enqueued) + sched_core_dequeue(rq, p); + + old_cookie = p->core_cookie; + p->core_cookie = cookie; + + if (enqueued) + sched_core_enqueue(rq, p); + + /* + * If task is currently running, it may not be compatible anymore after + * the cookie change, so enter the scheduler on its CPU to schedule it + * away. + */ + if (task_running(rq, p)) + resched_curr(rq); + + task_rq_unlock(rq, p, &rf); + + return old_cookie; +} + +static unsigned long sched_core_clone_cookie(struct task_struct *p) +{ + unsigned long cookie, flags; + + raw_spin_lock_irqsave(&p->pi_lock, flags); + cookie = sched_core_get_cookie(p->core_cookie); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + + return cookie; +} + +void sched_core_free(struct task_struct *p) +{ + sched_core_put_cookie(p->core_cookie); +} diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 3878386a0a024..904c52b560d16 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1229,6 +1229,22 @@ static inline bool sched_group_cookie_match(struct rq *rq, extern void queue_core_balance(struct rq *rq); +static inline bool sched_core_enqueued(struct task_struct *p) +{ + return !RB_EMPTY_NODE(&p->core_node); +} + +extern void sched_core_enqueue(struct rq *rq, struct task_struct *p); +extern void sched_core_dequeue(struct rq *rq, struct task_struct *p); + +extern void sched_core_get(void); +extern void sched_core_put(void); + +extern unsigned long sched_core_alloc_cookie(void); +extern void sched_core_put_cookie(unsigned long cookie); +extern unsigned long sched_core_get_cookie(unsigned long cookie); +extern unsigned long sched_core_update_cookie(struct task_struct *p, unsigned long cookie); + #else /* !CONFIG_SCHED_CORE */ static inline bool sched_core_enabled(struct rq *rq) -- GitLab From 85dd3f61203c5cfa72b308ff327b5fbf3fc1ce5e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 29 Mar 2021 15:18:35 +0200 Subject: [PATCH 0457/3804] sched: Inherit task cookie on fork() Note that sched_core_fork() is called from under tasklist_lock, and not from sched_fork() earlier. This avoids a few races later. Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123308.980003687@infradead.org --- include/linux/sched.h | 2 ++ kernel/fork.c | 3 +++ kernel/sched/core_sched.c | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index eab3f7c4251bf..fba47e52e482b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2181,8 +2181,10 @@ const struct cpumask *sched_trace_rd_span(struct root_domain *rd); #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); +extern void sched_core_fork(struct task_struct *p); #else static inline void sched_core_free(struct task_struct *tsk) { } +static inline void sched_core_fork(struct task_struct *p) { } #endif #endif diff --git a/kernel/fork.c b/kernel/fork.c index d16c60c9daca0..e7fd928fcafe9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2251,6 +2251,8 @@ static __latent_entropy struct task_struct *copy_process( klp_copy_process(p); + sched_core_fork(p); + spin_lock(¤t->sighand->siglock); /* @@ -2338,6 +2340,7 @@ static __latent_entropy struct task_struct *copy_process( return p; bad_fork_cancel_cgroup: + sched_core_free(p); spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); cgroup_cancel_fork(p, args); diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index 8d0869a9eb8c3..dcbbeaefaaa3a 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -103,6 +103,12 @@ static unsigned long sched_core_clone_cookie(struct task_struct *p) return cookie; } +void sched_core_fork(struct task_struct *p) +{ + RB_CLEAR_NODE(&p->core_node); + p->core_cookie = sched_core_clone_cookie(current); +} + void sched_core_free(struct task_struct *p) { sched_core_put_cookie(p->core_cookie); -- GitLab From 7ac592aa35a684ff1858fb9ec282886b9e3575ac Mon Sep 17 00:00:00 2001 From: Chris Hyser Date: Wed, 24 Mar 2021 17:40:15 -0400 Subject: [PATCH 0458/3804] sched: prctl() core-scheduling interface This patch provides support for setting and copying core scheduling 'task cookies' between threads (PID), processes (TGID), and process groups (PGID). The value of core scheduling isn't that tasks don't share a core, 'nosmt' can do that. The value lies in exploiting all the sharing opportunities that exist to recover possible lost performance and that requires a degree of flexibility in the API. From a security perspective (and there are others), the thread, process and process group distinction is an existent hierarchal categorization of tasks that reflects many of the security concerns about 'data sharing'. For example, protecting against cache-snooping by a thread that can just read the memory directly isn't all that useful. With this in mind, subcommands to CREATE/SHARE (TO/FROM) provide a mechanism to create and share cookies. CREATE/SHARE_TO specify a target pid with enum pidtype used to specify the scope of the targeted tasks. For example, PIDTYPE_TGID will share the cookie with the process and all of it's threads as typically desired in a security scenario. API: prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, tgtpid, pidtype, &cookie) prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, tgtpid, pidtype, NULL) prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, tgtpid, pidtype, NULL) prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, srcpid, pidtype, NULL) where 'tgtpid/srcpid == 0' implies the current process and pidtype is kernel enum pid_type {PIDTYPE_PID, PIDTYPE_TGID, PIDTYPE_PGID, ...}. For return values, EINVAL, ENOMEM are what they say. ESRCH means the tgtpid/srcpid was not found. EPERM indicates lack of PTRACE permission access to tgtpid/srcpid. ENODEV indicates your machines lacks SMT. [peterz: complete rewrite] Signed-off-by: Chris Hyser Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123309.039845339@infradead.org --- include/linux/sched.h | 2 + include/uapi/linux/prctl.h | 8 +++ kernel/sched/core_sched.c | 114 +++++++++++++++++++++++++++++++ kernel/sys.c | 5 ++ tools/include/uapi/linux/prctl.h | 8 +++ 5 files changed, 137 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index fba47e52e482b..c7e7d50e2fdca 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2182,6 +2182,8 @@ const struct cpumask *sched_trace_rd_span(struct root_domain *rd); #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); extern void sched_core_fork(struct task_struct *p); +extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, + unsigned long uaddr); #else static inline void sched_core_free(struct task_struct *tsk) { } static inline void sched_core_fork(struct task_struct *p) { } diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 18a9f59dc067f..967d9c55323d1 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -259,4 +259,12 @@ struct prctl_mm_map { #define PR_PAC_SET_ENABLED_KEYS 60 #define PR_PAC_GET_ENABLED_KEYS 61 +/* Request the scheduler to share a core */ +#define PR_SCHED_CORE 62 +# define PR_SCHED_CORE_GET 0 +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +# define PR_SCHED_CORE_MAX 4 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index dcbbeaefaaa3a..9a80e9a474c07 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include "sched.h" /* @@ -113,3 +114,116 @@ void sched_core_free(struct task_struct *p) { sched_core_put_cookie(p->core_cookie); } + +static void __sched_core_set(struct task_struct *p, unsigned long cookie) +{ + cookie = sched_core_get_cookie(cookie); + cookie = sched_core_update_cookie(p, cookie); + sched_core_put_cookie(cookie); +} + +/* Called from prctl interface: PR_SCHED_CORE */ +int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, + unsigned long uaddr) +{ + unsigned long cookie = 0, id = 0; + struct task_struct *task, *p; + struct pid *grp; + int err = 0; + + if (!static_branch_likely(&sched_smt_present)) + return -ENODEV; + + if (type > PIDTYPE_PGID || cmd >= PR_SCHED_CORE_MAX || pid < 0 || + (cmd != PR_SCHED_CORE_GET && uaddr)) + return -EINVAL; + + rcu_read_lock(); + if (pid == 0) { + task = current; + } else { + task = find_task_by_vpid(pid); + if (!task) { + rcu_read_unlock(); + return -ESRCH; + } + } + get_task_struct(task); + rcu_read_unlock(); + + /* + * Check if this process has the right to modify the specified + * process. Use the regular "ptrace_may_access()" checks. + */ + if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) { + err = -EPERM; + goto out; + } + + switch (cmd) { + case PR_SCHED_CORE_GET: + if (type != PIDTYPE_PID || uaddr & 7) { + err = -EINVAL; + goto out; + } + cookie = sched_core_clone_cookie(task); + if (cookie) { + /* XXX improve ? */ + ptr_to_hashval((void *)cookie, &id); + } + err = put_user(id, (u64 __user *)uaddr); + goto out; + + case PR_SCHED_CORE_CREATE: + cookie = sched_core_alloc_cookie(); + if (!cookie) { + err = -ENOMEM; + goto out; + } + break; + + case PR_SCHED_CORE_SHARE_TO: + cookie = sched_core_clone_cookie(current); + break; + + case PR_SCHED_CORE_SHARE_FROM: + if (type != PIDTYPE_PID) { + err = -EINVAL; + goto out; + } + cookie = sched_core_clone_cookie(task); + __sched_core_set(current, cookie); + goto out; + + default: + err = -EINVAL; + goto out; + }; + + if (type == PIDTYPE_PID) { + __sched_core_set(task, cookie); + goto out; + } + + read_lock(&tasklist_lock); + grp = task_pid_type(task, type); + + do_each_pid_thread(grp, type, p) { + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) { + err = -EPERM; + goto out_tasklist; + } + } while_each_pid_thread(grp, type, p); + + do_each_pid_thread(grp, type, p) { + __sched_core_set(p, cookie); + } while_each_pid_thread(grp, type, p); +out_tasklist: + read_unlock(&tasklist_lock); + +out: + sched_core_put_cookie(cookie); + put_task_struct(task); + return err; +} + diff --git a/kernel/sys.c b/kernel/sys.c index 3a583a29815fa..9de46a4bf4921 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2550,6 +2550,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = set_syscall_user_dispatch(arg2, arg3, arg4, (char __user *) arg5); break; +#ifdef CONFIG_SCHED_CORE + case PR_SCHED_CORE: + error = sched_core_share_pid(arg2, arg3, arg4, arg5); + break; +#endif default: error = -EINVAL; break; diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 18a9f59dc067f..967d9c55323d1 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -259,4 +259,12 @@ struct prctl_mm_map { #define PR_PAC_SET_ENABLED_KEYS 60 #define PR_PAC_GET_ENABLED_KEYS 61 +/* Request the scheduler to share a core */ +#define PR_SCHED_CORE 62 +# define PR_SCHED_CORE_GET 0 +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +# define PR_SCHED_CORE_MAX 4 + #endif /* _LINUX_PRCTL_H */ -- GitLab From 9f26990074931bbf797373e53104216059b300b1 Mon Sep 17 00:00:00 2001 From: Chris Hyser Date: Wed, 24 Mar 2021 17:40:16 -0400 Subject: [PATCH 0459/3804] kselftest: Add test for core sched prctl interface Provides a selftest and examples of using the interface. [peterz: updated to not use sched_debug] Signed-off-by: Chris Hyser Signed-off-by: Peter Zijlstra (Intel) Tested-by: Don Hiatt Tested-by: Hongyu Ning Tested-by: Vincent Guittot Link: https://lkml.kernel.org/r/20210422123309.100860030@infradead.org --- tools/testing/selftests/sched/.gitignore | 1 + tools/testing/selftests/sched/Makefile | 14 + tools/testing/selftests/sched/config | 1 + tools/testing/selftests/sched/cs_prctl_test.c | 338 ++++++++++++++++++ 4 files changed, 354 insertions(+) create mode 100644 tools/testing/selftests/sched/.gitignore create mode 100644 tools/testing/selftests/sched/Makefile create mode 100644 tools/testing/selftests/sched/config create mode 100644 tools/testing/selftests/sched/cs_prctl_test.c diff --git a/tools/testing/selftests/sched/.gitignore b/tools/testing/selftests/sched/.gitignore new file mode 100644 index 0000000000000..6996d4654d924 --- /dev/null +++ b/tools/testing/selftests/sched/.gitignore @@ -0,0 +1 @@ +cs_prctl_test diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile new file mode 100644 index 0000000000000..10c72f14fea9d --- /dev/null +++ b/tools/testing/selftests/sched/Makefile @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0+ + +ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) +CLANG_FLAGS += -no-integrated-as +endif + +CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -Wl,-rpath=./ \ + $(CLANG_FLAGS) +LDLIBS += -lpthread + +TEST_GEN_FILES := cs_prctl_test +TEST_PROGS := cs_prctl_test + +include ../lib.mk diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config new file mode 100644 index 0000000000000..e8b09aa7c0c4c --- /dev/null +++ b/tools/testing/selftests/sched/config @@ -0,0 +1 @@ +CONFIG_SCHED_DEBUG=y diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c new file mode 100644 index 0000000000000..63fe6521c56d9 --- /dev/null +++ b/tools/testing/selftests/sched/cs_prctl_test.c @@ -0,0 +1,338 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Use the core scheduling prctl() to test core scheduling cookies control. + * + * Copyright (c) 2021 Oracle and/or its affiliates. + * Author: Chris Hyser + * + * + * This library is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License as + * published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License + * for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this library; if not, see . + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if __GLIBC_PREREQ(2, 30) == 0 +#include +static pid_t gettid(void) +{ + return syscall(SYS_gettid); +} +#endif + +#ifndef PR_SCHED_CORE +#define PR_SCHED_CORE 62 +# define PR_SCHED_CORE_GET 0 +# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */ +# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */ +# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */ +# define PR_SCHED_CORE_MAX 4 +#endif + +#define MAX_PROCESSES 128 +#define MAX_THREADS 128 + +static const char USAGE[] = "cs_prctl_test [options]\n" +" options:\n" +" -P : number of processes to create.\n" +" -T : number of threads per process to create.\n" +" -d : delay time to keep tasks alive.\n" +" -k : keep tasks alive until keypress.\n"; + +enum pid_type {PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID}; + +const int THREAD_CLONE_FLAGS = CLONE_THREAD | CLONE_SIGHAND | CLONE_FS | CLONE_VM | CLONE_FILES; + +static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, + unsigned long arg5) +{ + int res; + + res = prctl(option, arg2, arg3, arg4, arg5); + printf("%d = prctl(%d, %ld, %ld, %ld, %lx)\n", res, option, (long)arg2, (long)arg3, + (long)arg4, arg5); + return res; +} + +#define STACK_SIZE (1024 * 1024) + +#define handle_error(msg) __handle_error(__FILE__, __LINE__, msg) +static void __handle_error(char *fn, int ln, char *msg) +{ + printf("(%s:%d) - ", fn, ln); + perror(msg); + exit(EXIT_FAILURE); +} + +static void handle_usage(int rc, char *msg) +{ + puts(USAGE); + puts(msg); + putchar('\n'); + exit(rc); +} + +static unsigned long get_cs_cookie(int pid) +{ + unsigned long long cookie; + int ret; + + ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid, PIDTYPE_PID, + (unsigned long)&cookie); + if (ret) { + printf("Not a core sched system\n"); + return -1UL; + } + + return cookie; +} + +struct child_args { + int num_threads; + int pfd[2]; + int cpid; + int thr_tids[MAX_THREADS]; +}; + +static int child_func_thread(void __attribute__((unused))*arg) +{ + while (1) + usleep(20000); + return 0; +} + +static void create_threads(int num_threads, int thr_tids[]) +{ + void *child_stack; + pid_t tid; + int i; + + for (i = 0; i < num_threads; ++i) { + child_stack = malloc(STACK_SIZE); + if (!child_stack) + handle_error("child stack allocate"); + + tid = clone(child_func_thread, child_stack + STACK_SIZE, THREAD_CLONE_FLAGS, NULL); + if (tid == -1) + handle_error("clone thread"); + thr_tids[i] = tid; + } +} + +static int child_func_process(void *arg) +{ + struct child_args *ca = (struct child_args *)arg; + + close(ca->pfd[0]); + + create_threads(ca->num_threads, ca->thr_tids); + + write(ca->pfd[1], &ca->thr_tids, sizeof(int) * ca->num_threads); + close(ca->pfd[1]); + + while (1) + usleep(20000); + return 0; +} + +static unsigned char child_func_process_stack[STACK_SIZE]; + +void create_processes(int num_processes, int num_threads, struct child_args proc[]) +{ + pid_t cpid; + int i; + + for (i = 0; i < num_processes; ++i) { + proc[i].num_threads = num_threads; + + if (pipe(proc[i].pfd) == -1) + handle_error("pipe() failed"); + + cpid = clone(child_func_process, child_func_process_stack + STACK_SIZE, + SIGCHLD, &proc[i]); + proc[i].cpid = cpid; + close(proc[i].pfd[1]); + } + + for (i = 0; i < num_processes; ++i) { + read(proc[i].pfd[0], &proc[i].thr_tids, sizeof(int) * proc[i].num_threads); + close(proc[i].pfd[0]); + } +} + +void disp_processes(int num_processes, struct child_args proc[]) +{ + int i, j; + + printf("tid=%d, / tgid=%d / pgid=%d: %lx\n", gettid(), getpid(), getpgid(0), + get_cs_cookie(getpid())); + + for (i = 0; i < num_processes; ++i) { + printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].cpid, proc[i].cpid, + getpgid(proc[i].cpid), get_cs_cookie(proc[i].cpid)); + for (j = 0; j < proc[i].num_threads; ++j) { + printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].thr_tids[j], + proc[i].cpid, getpgid(0), get_cs_cookie(proc[i].thr_tids[j])); + } + } + puts("\n"); +} + +static int errors; + +#define validate(v) _validate(__LINE__, v, #v) +void _validate(int line, int val, char *msg) +{ + if (!val) { + ++errors; + printf("(%d) FAILED: %s\n", line, msg); + } else { + printf("(%d) PASSED: %s\n", line, msg); + } +} + +int main(int argc, char *argv[]) +{ + struct child_args procs[MAX_PROCESSES]; + + int keypress = 0; + int num_processes = 2; + int num_threads = 3; + int delay = 0; + int res = 0; + int pidx; + int pid; + int opt; + + while ((opt = getopt(argc, argv, ":hkT:P:d:")) != -1) { + switch (opt) { + case 'P': + num_processes = (int)strtol(optarg, NULL, 10); + break; + case 'T': + num_threads = (int)strtoul(optarg, NULL, 10); + break; + case 'd': + delay = (int)strtol(optarg, NULL, 10); + break; + case 'k': + keypress = 1; + break; + case 'h': + printf(USAGE); + exit(EXIT_SUCCESS); + default: + handle_usage(20, "unknown option"); + } + } + + if (num_processes < 1 || num_processes > MAX_PROCESSES) + handle_usage(1, "Bad processes value"); + + if (num_threads < 1 || num_threads > MAX_THREADS) + handle_usage(2, "Bad thread value"); + + if (keypress) + delay = -1; + + srand(time(NULL)); + + /* put into separate process group */ + if (setpgid(0, 0) != 0) + handle_error("process group"); + + printf("\n## Create a thread/process/process group hiearchy\n"); + create_processes(num_processes, num_threads, procs); + disp_processes(num_processes, procs); + validate(get_cs_cookie(0) == 0); + + printf("\n## Set a cookie on entire process group\n"); + if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, 0, PIDTYPE_PGID, 0) < 0) + handle_error("core_sched create failed -- PGID"); + disp_processes(num_processes, procs); + + validate(get_cs_cookie(0) != 0); + + /* get a random process pid */ + pidx = rand() % num_processes; + pid = procs[pidx].cpid; + + validate(get_cs_cookie(0) == get_cs_cookie(pid)); + validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0])); + + printf("\n## Set a new cookie on entire process/TGID [%d]\n", pid); + if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid, PIDTYPE_TGID, 0) < 0) + handle_error("core_sched create failed -- TGID"); + disp_processes(num_processes, procs); + + validate(get_cs_cookie(0) != get_cs_cookie(pid)); + validate(get_cs_cookie(pid) != 0); + validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0])); + + printf("\n## Copy the cookie of current/PGID[%d], to pid [%d] as PIDTYPE_PID\n", + getpid(), pid); + if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, pid, PIDTYPE_PID, 0) < 0) + handle_error("core_sched share to itself failed -- PID"); + disp_processes(num_processes, procs); + + validate(get_cs_cookie(0) == get_cs_cookie(pid)); + validate(get_cs_cookie(pid) != 0); + validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0])); + + printf("\n## Copy cookie from a thread [%d] to current/PGID [%d] as PIDTYPE_PID\n", + procs[pidx].thr_tids[0], getpid()); + if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, procs[pidx].thr_tids[0], + PIDTYPE_PID, 0) < 0) + handle_error("core_sched share from thread failed -- PID"); + disp_processes(num_processes, procs); + + validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0])); + validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0])); + + printf("\n## Copy cookie from current [%d] to current as pidtype PGID\n", getpid()); + if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, 0, PIDTYPE_PGID, 0) < 0) + handle_error("core_sched share to self failed -- PGID"); + disp_processes(num_processes, procs); + + validate(get_cs_cookie(0) == get_cs_cookie(pid)); + validate(get_cs_cookie(pid) != 0); + validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0])); + + if (errors) { + printf("TESTS FAILED. errors: %d\n", errors); + res = 10; + } else { + printf("SUCCESS !!!\n"); + } + + if (keypress) + getchar(); + else + sleep(delay); + + for (pidx = 0; pidx < num_processes; ++pidx) + kill(procs[pidx].cpid, 15); + + return res; +} -- GitLab From 64e1f5872a8c3d80bce4686b4ab5dbc6e6bd30c5 Mon Sep 17 00:00:00 2001 From: Pavel Skripkin Date: Thu, 6 May 2021 22:07:26 +0300 Subject: [PATCH 0460/3804] x86/alternatives: Make the x86nops[] symbol static Sparse says: arch/x86/kernel/alternative.c:78:21: warning: symbol 'x86nops' was not declared. Should it be static? Since x86nops[] is not used outside this file, Sparse is right and it can be made static. Signed-off-by: Pavel Skripkin Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506190726.15575-1-paskripkin@gmail.com --- arch/x86/kernel/alternative.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 6974b51744955..75c752b0628c1 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -75,7 +75,7 @@ do { \ } \ } while (0) -const unsigned char x86nops[] = +static const unsigned char x86nops[] = { BYTES_NOP1, BYTES_NOP2, -- GitLab From 1bc67873d401e6c2e6e30be7fef21337db07a042 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 12 May 2021 11:33:10 +0200 Subject: [PATCH 0461/3804] x86/asm: Simplify __smp_mb() definition Drop the bitness ifdeffery in favor of using _ASM_SP, which is the helper macro for the rSP register specification for 32 and 64 bit depending on the build. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20210512093310.5635-1-bp@alien8.de --- arch/x86/include/asm/barrier.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index 4819d5e5a3353..3ba772a69cc8b 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -54,11 +54,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #define dma_rmb() barrier() #define dma_wmb() barrier() -#ifdef CONFIG_X86_32 -#define __smp_mb() asm volatile("lock; addl $0,-4(%%esp)" ::: "memory", "cc") -#else -#define __smp_mb() asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc") -#endif +#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc") + #define __smp_rmb() dma_rmb() #define __smp_wmb() barrier() #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0) -- GitLab From c6c82e0cd8125d30f2f1b29205c7e1a2f1a6785b Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Tue, 11 May 2021 21:56:29 +0200 Subject: [PATCH 0462/3804] vfio-ccw: Check initialized flag in cp_init() We have a really nice flag in the channel_program struct that indicates if it had been initialized by cp_init(), and use it as a guard in the other cp accessor routines, but not for a duplicate call into cp_init(). The possibility of this occurring is low, because that flow is protected by the private->io_mutex and FSM CP_PROCESSING state. But then why bother checking it in (for example) cp_prefetch() then? Let's just be consistent and check for that in cp_init() too. Fixes: 71189f263f8a3 ("vfio-ccw: make it safe to access channel programs") Signed-off-by: Eric Farman Reviewed-by: Cornelia Huck Acked-by: Matthew Rosato Message-Id: <20210511195631.3995081-2-farman@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_cp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index b9febc581b1f4..8d1b2771c1aa0 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -638,6 +638,10 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb) static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 1); int ret; + /* this is an error in the caller */ + if (cp->initialized) + return -EBUSY; + /* * We only support prefetching the channel program. We assume all channel * programs executed by supported guests likewise support prefetching. -- GitLab From 6c02ac4c9211edabe17bda437ac97e578756f31b Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Tue, 11 May 2021 21:56:30 +0200 Subject: [PATCH 0463/3804] vfio-ccw: Reset FSM state to IDLE inside FSM When an I/O request is made, the fsm_io_request() routine moves the FSM state from IDLE to CP_PROCESSING, and then fsm_io_helper() moves it to CP_PENDING if the START SUBCHANNEL received a cc0. Yet, the error case to go from CP_PROCESSING back to IDLE is done after the FSM call returns. Let's move this up into the FSM proper, to provide some better symmetry when unwinding in this case. Signed-off-by: Eric Farman Reviewed-by: Cornelia Huck Acked-by: Matthew Rosato Message-Id: <20210511195631.3995081-3-farman@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_fsm.c | 1 + drivers/s390/cio/vfio_ccw_ops.c | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 23e61aa638e4e..e435a9cd92dac 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -318,6 +318,7 @@ static void fsm_io_request(struct vfio_ccw_private *private, } err_out: + private->state = VFIO_CCW_STATE_IDLE; trace_vfio_ccw_fsm_io_request(scsw->cmd.fctl, schid, io_region->ret_code, errstr); } diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 491a64c61fff1..c57d2a7f09197 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -279,8 +279,6 @@ static ssize_t vfio_ccw_mdev_write_io_region(struct vfio_ccw_private *private, } vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_IO_REQ); - if (region->ret_code != 0) - private->state = VFIO_CCW_STATE_IDLE; ret = (region->ret_code != 0) ? region->ret_code : count; out_unlock: -- GitLab From 2af7a834a435460d546f0cf0a8b8e4d259f1d910 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Tue, 11 May 2021 21:56:31 +0200 Subject: [PATCH 0464/3804] vfio-ccw: Serialize FSM IDLE state with I/O completion Today, the stacked call to vfio_ccw_sch_io_todo() does three things: 1) Update a solicited IRB with CP information, and release the CP if the interrupt was the end of a START operation. 2) Copy the IRB data into the io_region, under the protection of the io_mutex 3) Reset the vfio-ccw FSM state to IDLE to acknowledge that vfio-ccw can accept more work. The trouble is that step 3 is (A) invoked for both solicited and unsolicited interrupts, and (B) sitting after the mutex for step 2. This second piece becomes a problem if it processes an interrupt for a CLEAR SUBCHANNEL while another thread initiates a START, thus allowing the CP and FSM states to get out of sync. That is: CPU 1 CPU 2 fsm_do_clear() fsm_irq() fsm_io_request() vfio_ccw_sch_io_todo() fsm_io_helper() Since the FSM state and CP should be kept in sync, let's make a note when the CP is released, and rely on that as an indication that the FSM should also be reset at the end of this routine and open up the device for more work. Signed-off-by: Eric Farman Acked-by: Matthew Rosato Reviewed-by: Cornelia Huck Message-Id: <20210511195631.3995081-4-farman@linux.ibm.com> Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_drv.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 8c625b530035f..9b61e9b131ade 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -86,6 +86,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) struct vfio_ccw_private *private; struct irb *irb; bool is_final; + bool cp_is_finished = false; private = container_of(work, struct vfio_ccw_private, io_work); irb = &private->irb; @@ -94,14 +95,21 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT)); if (scsw_is_solicited(&irb->scsw)) { cp_update_scsw(&private->cp, &irb->scsw); - if (is_final && private->state == VFIO_CCW_STATE_CP_PENDING) + if (is_final && private->state == VFIO_CCW_STATE_CP_PENDING) { cp_free(&private->cp); + cp_is_finished = true; + } } mutex_lock(&private->io_mutex); memcpy(private->io_region->irb_area, irb, sizeof(*irb)); mutex_unlock(&private->io_mutex); - if (private->mdev && is_final) + /* + * Reset to IDLE only if processing of a channel program + * has finished. Do not overwrite a possible processing + * state if the final interrupt was for HSCH or CSCH. + */ + if (private->mdev && cp_is_finished) private->state = VFIO_CCW_STATE_IDLE; if (private->io_trigger) -- GitLab From f1a0a376ca0c4ef1fc3d24e3e502acbb5b795674 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Wed, 12 May 2021 10:46:36 +0100 Subject: [PATCH 0465/3804] sched/core: Initialize the idle task with preemption disabled As pointed out by commit de9b8f5dcbd9 ("sched: Fix crash trying to dequeue/enqueue the idle thread") init_idle() can and will be invoked more than once on the same idle task. At boot time, it is invoked for the boot CPU thread by sched_init(). Then smp_init() creates the threads for all the secondary CPUs and invokes init_idle() on them. As the hotplug machinery brings the secondaries to life, it will issue calls to idle_thread_get(), which itself invokes init_idle() yet again. In this case it's invoked twice more per secondary: at _cpu_up(), and at bringup_cpu(). Given smp_init() already initializes the idle tasks for all *possible* CPUs, no further initialization should be required. Now, removing init_idle() from idle_thread_get() exposes some interesting expectations with regards to the idle task's preempt_count: the secondary startup always issues a preempt_disable(), requiring some reset of the preempt count to 0 between hot-unplug and hotplug, which is currently served by idle_thread_get() -> idle_init(). Given the idle task is supposed to have preemption disabled once and never see it re-enabled, it seems that what we actually want is to initialize its preempt_count to PREEMPT_DISABLED and leave it there. Do that, and remove init_idle() from idle_thread_get(). Secondary startups were patched via coccinelle: @begone@ @@ -preempt_disable(); ... cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); Signed-off-by: Valentin Schneider Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra Link: https://lore.kernel.org/r/20210512094636.2958515-1-valentin.schneider@arm.com --- arch/alpha/kernel/smp.c | 1 - arch/arc/kernel/smp.c | 1 - arch/arm/kernel/smp.c | 1 - arch/arm64/include/asm/preempt.h | 2 +- arch/arm64/kernel/smp.c | 1 - arch/csky/kernel/smp.c | 1 - arch/ia64/kernel/smpboot.c | 1 - arch/mips/kernel/smp.c | 1 - arch/openrisc/kernel/smp.c | 2 -- arch/parisc/kernel/smp.c | 1 - arch/powerpc/kernel/smp.c | 1 - arch/riscv/kernel/smpboot.c | 1 - arch/s390/include/asm/preempt.h | 4 ++-- arch/s390/kernel/smp.c | 1 - arch/sh/kernel/smp.c | 2 -- arch/sparc/kernel/smp_32.c | 1 - arch/sparc/kernel/smp_64.c | 3 --- arch/x86/include/asm/preempt.h | 2 +- arch/x86/kernel/smpboot.c | 1 - arch/xtensa/kernel/smp.c | 1 - include/asm-generic/preempt.h | 2 +- init/main.c | 6 +----- kernel/fork.c | 2 +- kernel/sched/core.c | 2 +- kernel/smpboot.c | 1 - 25 files changed, 8 insertions(+), 34 deletions(-) diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index f4dd9f3f30010..4b2575f936d46 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -166,7 +166,6 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", cpuid, current, current->active_mm)); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 52906d3145371..db0e104d68355 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -189,7 +189,6 @@ void start_kernel_secondary(void) pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); local_irq_enable(); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 74679240a9d8e..c7bb168b0d97c 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -432,7 +432,6 @@ asmlinkage void secondary_start_kernel(void) #endif pr_debug("CPU%u: Booted secondary processor\n", cpu); - preempt_disable(); trace_hardirqs_off(); /* diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h index 80e946b2abee2..e83f0982b99c1 100644 --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h @@ -23,7 +23,7 @@ static inline void preempt_count_set(u64 pc) } while (0) #define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ } while (0) static inline void set_preempt_need_resched(void) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index dcd7041b2b077..6671000a8b7d7 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -224,7 +224,6 @@ asmlinkage notrace void secondary_start_kernel(void) init_gic_priority_masking(); rcu_cpu_starting(cpu); - preempt_disable(); trace_hardirqs_off(); /* diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index 0f9f5eef93386..e2993539af8ef 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -281,7 +281,6 @@ void csky_start_secondary(void) pr_info("CPU%u Online: %s...\n", cpu, __func__); local_irq_enable(); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 49b4885809399..d10f780c13b9e 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -441,7 +441,6 @@ start_secondary (void *unused) #endif efi_map_pal_code(); cpu_init(); - preempt_disable(); smp_callin(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index ef86fbad85460..d542fb7af3ba2 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -348,7 +348,6 @@ asmlinkage void start_secondary(void) */ calibrate_delay(); - preempt_disable(); cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c index 48e1092a64de3..415e209732a3d 100644 --- a/arch/openrisc/kernel/smp.c +++ b/arch/openrisc/kernel/smp.c @@ -145,8 +145,6 @@ asmlinkage __init void secondary_start_kernel(void) set_cpu_online(cpu, true); local_irq_enable(); - - preempt_disable(); /* * OK, it's off to the idle thread for us */ diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 10227f667c8a6..1405b603b91b6 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -302,7 +302,6 @@ void __init smp_callin(unsigned long pdce_proc) #endif smp_cpu_init(slave_id); - preempt_disable(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 2e05c783440a3..6c6e4d934d867 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1547,7 +1547,6 @@ void start_secondary(void *unused) smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); rcu_cpu_starting(cpu); - preempt_disable(); cpu_callin_map[cpu] = 1; if (smp_ops->setup_cpu) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 9a408e2942acf..bd82375db51a6 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -180,7 +180,6 @@ asmlinkage __visible void smp_callin(void) * Disable preemption before enabling interrupts, so we don't try to * schedule a CPU that hasn't actually started yet. */ - preempt_disable(); local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index b49e0492842cc..23ff51be7e29c 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -32,7 +32,7 @@ static inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ + S390_lowcore.preempt_count = PREEMPT_DISABLED; \ } while (0) static inline void set_preempt_need_resched(void) @@ -91,7 +91,7 @@ static inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ + S390_lowcore.preempt_count = PREEMPT_DISABLED; \ } while (0) static inline void set_preempt_need_resched(void) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 2fec2b80d35d2..111909aeb8d21 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -878,7 +878,6 @@ static void smp_init_secondary(void) restore_access_regs(S390_lowcore.access_regs_save_area); cpu_init(); rcu_cpu_starting(cpu); - preempt_disable(); init_cpu_timer(); vtime_init(); vdso_getcpu_init(); diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 372acdc9033eb..65924d9ec2459 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -186,8 +186,6 @@ asmlinkage void start_secondary(void) per_cpu_trap_init(); - preempt_disable(); - notify_cpu_starting(cpu); local_irq_enable(); diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 50c127ab46d5b..22b148e5a5f88 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -348,7 +348,6 @@ static void sparc_start_secondary(void *arg) */ arch_cpu_pre_starting(arg); - preempt_disable(); cpu = smp_processor_id(); notify_cpu_starting(cpu); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index e38d8bf454e86..ae5faa1d989d2 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -138,9 +138,6 @@ void smp_callin(void) set_cpu_online(cpuid, true); - /* idle thread is expected to have preempt disabled */ - preempt_disable(); - local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index f8cb8af4de5ce..fe5efbcba8240 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0ad5214f598a9..0936f5ba32229 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -236,7 +236,6 @@ static void notrace start_secondary(void *unused) cpu_init(); rcu_cpu_starting(raw_smp_processor_id()); x86_cpuinit.early_percpu_clock_init(); - preempt_disable(); smp_callin(); enable_start_cpu0 = 0; diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index cd85a7a2722ba..1254da07ead1f 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -145,7 +145,6 @@ void secondary_start_kernel(void) cpumask_set_cpu(cpu, mm_cpumask(mm)); enter_lazy_tlb(mm, current); - preempt_disable(); trace_hardirqs_off(); calibrate_delay(); diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index d683f5e6d7913..b4d43a4af5f79 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -29,7 +29,7 @@ static __always_inline void preempt_count_set(int pc) } while (0) #define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ } while (0) static __always_inline void set_preempt_need_resched(void) diff --git a/init/main.c b/init/main.c index eb01e121d2f15..7b027d9c5c89b 100644 --- a/init/main.c +++ b/init/main.c @@ -941,11 +941,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) * time - but meanwhile we still have a functioning scheduler. */ sched_init(); - /* - * Disable preemption - early bootup scheduling is extremely - * fragile until we cpu_idle() for the first time. - */ - preempt_disable(); + if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); diff --git a/kernel/fork.c b/kernel/fork.c index e7fd928fcafe9..ace4631b5b547 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2412,7 +2412,7 @@ static inline void init_idle_pids(struct task_struct *idle) } } -struct task_struct *fork_idle(int cpu) +struct task_struct * __init fork_idle(int cpu) { struct task_struct *task; struct kernel_clone_args args = { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 55b2d9399e12a..9d00f4958bde7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8227,7 +8227,7 @@ void show_state_filter(unsigned long state_filter) * NOTE: this function does not set the idle thread's NEED_RESCHED * flag, to make booting more robust. */ -void init_idle(struct task_struct *idle, int cpu) +void __init init_idle(struct task_struct *idle, int cpu) { struct rq *rq = cpu_rq(cpu); unsigned long flags; diff --git a/kernel/smpboot.c b/kernel/smpboot.c index f25208e8df836..e4163042c4d66 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -33,7 +33,6 @@ struct task_struct *idle_thread_get(unsigned int cpu) if (!tsk) return ERR_PTR(-ENOMEM); - init_idle(tsk, cpu); return tsk; } -- GitLab From 1e948b1752b58c9c570989ab29ceef5b38fdccda Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Wed, 12 May 2021 11:17:47 +0800 Subject: [PATCH 0466/3804] gpio: cadence: Add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Zou Wei Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-cadence.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/gpio-cadence.c b/drivers/gpio/gpio-cadence.c index a4d3239d25944..4ab3fcd9b9ba6 100644 --- a/drivers/gpio/gpio-cadence.c +++ b/drivers/gpio/gpio-cadence.c @@ -278,6 +278,7 @@ static const struct of_device_id cdns_of_ids[] = { { .compatible = "cdns,gpio-r1p02" }, { /* sentinel */ }, }; +MODULE_DEVICE_TABLE(of, cdns_of_ids); static struct platform_driver cdns_gpio_driver = { .driver = { -- GitLab From a0579474effff6a139768b300d8439c2327b3848 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 10 May 2021 22:46:30 +0300 Subject: [PATCH 0467/3804] gpio: xilinx: Correct kernel doc for xgpio_probe() Kernel doc validator complains: .../gpio-xilinx.c:556: warning: expecting prototype for xgpio_of_probe(). Prototype was for xgpio_probe() instead Correct as suggested by changing the name of the function in the doc.. Fixes: 749564ffd52d ("gpio/xilinx: Convert the driver to platform device interface") Signed-off-by: Andy Shevchenko Tested-by: Neeli Srinivas Reviewed-by: Michal Simek Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-xilinx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpio/gpio-xilinx.c b/drivers/gpio/gpio-xilinx.c index b411d3156e0b6..136557e7dd3ce 100644 --- a/drivers/gpio/gpio-xilinx.c +++ b/drivers/gpio/gpio-xilinx.c @@ -542,7 +542,7 @@ static void xgpio_irqhandler(struct irq_desc *desc) } /** - * xgpio_of_probe - Probe method for the GPIO device. + * xgpio_probe - Probe method for the GPIO device. * @pdev: pointer to the platform device * * Return: -- GitLab From bdbe871ef0caa660e16461a2a94579d9f9ef7ba4 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Fri, 7 May 2021 11:34:11 +0100 Subject: [PATCH 0468/3804] gpio: tegra186: Don't set parent IRQ affinity When hotplugging CPUs on Tegra186 and Tegra194 errors such as the following are seen ... IRQ63: set affinity failed(-22). IRQ65: set affinity failed(-22). IRQ66: set affinity failed(-22). IRQ67: set affinity failed(-22). Looking at the /proc/interrupts the above are all interrupts associated with GPIOs. The reason why these error messages occur is because there is no 'parent_data' associated with any of the GPIO interrupts and so tegra186_irq_set_affinity() simply returns -EINVAL. To understand why there is no 'parent_data' it is first necessary to understand that in addition to the GPIO interrupts being routed to the interrupt controller (GIC), the interrupts for some GPIOs are also routed to the Tegra Power Management Controller (PMC) to wake up the system from low power states. In order to configure GPIO events as wake events in the PMC, the PMC is configured as IRQ parent domain for the GPIO IRQ domain. Originally the GIC was the IRQ parent domain of the PMC and although this was working, this started causing issues once commit 64a267e9a41c ("irqchip/gic: Configure SGIs as standard interrupts") was added, because technically, the GIC is not a parent of the PMC. Commit c351ab7bf2a5 ("soc/tegra: pmc: Don't create fake interrupt hierarchy levels") fixed this by severing the IRQ domain hierarchy for the Tegra GPIOs and hence, there may be no IRQ parent domain for the GPIOs. The GPIO controllers on Tegra186 and Tegra194 have either one or six interrupt lines to the interrupt controller. For GPIO controllers with six interrupts, the mapping of the GPIO interrupt to the controller interrupt is configurable within the GPIO controller. Currently a default mapping is used, however, it could be possible to use the set affinity callback for the Tegra186 GPIO driver to do something a bit more interesting. Currently, because interrupts for all GPIOs are have the same mapping and any attempts to configure the affinity for a given GPIO can conflict with another that shares the same IRQ, for now it is simpler to just remove set affinity support and this avoids the above warnings being seen. Cc: Fixes: c4e1f7d92cd6 ("gpio: tegra186: Set affinity callback to parent") Signed-off-by: Jon Hunter Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-tegra186.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index 1bd9e44df7184..05974b760796b 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -444,16 +444,6 @@ static int tegra186_irq_set_wake(struct irq_data *data, unsigned int on) return 0; } -static int tegra186_irq_set_affinity(struct irq_data *data, - const struct cpumask *dest, - bool force) -{ - if (data->parent_data) - return irq_chip_set_affinity_parent(data, dest, force); - - return -EINVAL; -} - static void tegra186_gpio_irq(struct irq_desc *desc) { struct tegra_gpio *gpio = irq_desc_get_handler_data(desc); @@ -700,7 +690,6 @@ static int tegra186_gpio_probe(struct platform_device *pdev) gpio->intc.irq_unmask = tegra186_irq_unmask; gpio->intc.irq_set_type = tegra186_irq_set_type; gpio->intc.irq_set_wake = tegra186_irq_set_wake; - gpio->intc.irq_set_affinity = tegra186_irq_set_affinity; irq = &gpio->gpio.irq; irq->chip = &gpio->intc; -- GitLab From 47c1131633ef6210add63b8b5704497023a3462a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 12 May 2021 08:09:08 +0900 Subject: [PATCH 0469/3804] ASoC: soc-dai.h: Align the word of comment for SND_SOC_DAIFMT_CBC_CFC Let's use "consumer" instead of "follower". Signed-off-by: Kuninori Morimoto Reviewed-by: Pierre-Louis Bossart Link: https://lore.kernel.org/r/8735usc1gr.wl-kuninori.morimoto.gx@renesas.com Signed-off-by: Mark Brown --- include/sound/soc-dai.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/sound/soc-dai.h b/include/sound/soc-dai.h index 1358a0ceb4d01..0bc29c4516e76 100644 --- a/include/sound/soc-dai.h +++ b/include/sound/soc-dai.h @@ -81,7 +81,7 @@ struct snd_compr_stream; #define SND_SOC_DAIFMT_CBP_CFP (1 << 12) /* codec clk provider & frame provider */ #define SND_SOC_DAIFMT_CBC_CFP (2 << 12) /* codec clk consumer & frame provider */ #define SND_SOC_DAIFMT_CBP_CFC (3 << 12) /* codec clk provider & frame consumer */ -#define SND_SOC_DAIFMT_CBC_CFC (4 << 12) /* codec clk consumer & frame follower */ +#define SND_SOC_DAIFMT_CBC_CFC (4 << 12) /* codec clk consumer & frame consumer */ /* previous definitions kept for backwards-compatibility, do not use in new contributions */ #define SND_SOC_DAIFMT_CBM_CFM SND_SOC_DAIFMT_CBP_CFP -- GitLab From e072b2671606c77538d6a4dd5dda80b508cb4816 Mon Sep 17 00:00:00 2001 From: Zou Wei Date: Wed, 12 May 2021 11:12:25 +0800 Subject: [PATCH 0470/3804] ASoC: sti-sas: add missing MODULE_DEVICE_TABLE This patch adds missing MODULE_DEVICE_TABLE definition which generates correct modalias for automatic loading of this driver when it is built as an external module. Reported-by: Hulk Robot Signed-off-by: Zou Wei Link: https://lore.kernel.org/r/1620789145-14936-1-git-send-email-zou_wei@huawei.com Signed-off-by: Mark Brown --- sound/soc/codecs/sti-sas.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/sti-sas.c b/sound/soc/codecs/sti-sas.c index ffdf7e5595153..82a24e330065f 100644 --- a/sound/soc/codecs/sti-sas.c +++ b/sound/soc/codecs/sti-sas.c @@ -408,6 +408,7 @@ static const struct of_device_id sti_sas_dev_match[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, sti_sas_dev_match); static int sti_sas_driver_probe(struct platform_device *pdev) { -- GitLab From 96f685974609d4c315669ef33d55dbc43996491e Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 11 May 2021 18:57:14 +0100 Subject: [PATCH 0471/3804] ASoC: cs53l30: Add missing regmap use_single config This device requires single register transactions, this will definely cause problems with the new device ID parsing which uses regmap_bulk_read but might also show up in the cache sync sometimes. Add the missing flags to the regmap_config. Fixes: 4fc81bc88ad9 ("ASoC: cs53l30: Minor error paths fixups") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210511175718.15416-1-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs53l30.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs53l30.c b/sound/soc/codecs/cs53l30.c index 3d67cbf9eaaa2..abe0cc0bc03a9 100644 --- a/sound/soc/codecs/cs53l30.c +++ b/sound/soc/codecs/cs53l30.c @@ -912,6 +912,9 @@ static struct regmap_config cs53l30_regmap = { .writeable_reg = cs53l30_writeable_register, .readable_reg = cs53l30_readable_register, .cache_type = REGCACHE_RBTREE, + + .use_single_read = true, + .use_single_write = true, }; static int cs53l30_i2c_probe(struct i2c_client *client, -- GitLab From 27fb585169024440c1b358da35499fa578d803cd Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 11 May 2021 18:57:15 +0100 Subject: [PATCH 0472/3804] ASoC: cs42l73: Add missing regmap use_single config This device requires single register transactions, this will definely cause problems with the new device ID parsing which uses regmap_bulk_read but might also show up in the cache sync sometimes. Add the missing flags to the regmap_config. Fixes: 26495252fe0d ("ASoC: cs42l73: Minor error paths fixups") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210511175718.15416-2-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l73.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs42l73.c b/sound/soc/codecs/cs42l73.c index c3f974ec78e58..e92bacaab53fc 100644 --- a/sound/soc/codecs/cs42l73.c +++ b/sound/soc/codecs/cs42l73.c @@ -1268,6 +1268,9 @@ static const struct regmap_config cs42l73_regmap = { .volatile_reg = cs42l73_volatile_register, .readable_reg = cs42l73_readable_register, .cache_type = REGCACHE_RBTREE, + + .use_single_read = true, + .use_single_write = true, }; static int cs42l73_i2c_probe(struct i2c_client *i2c_client, -- GitLab From 2a682f821941e28fb9ceaa1dd03ccfaea0448101 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 11 May 2021 18:57:16 +0100 Subject: [PATCH 0473/3804] ASoC: cs35l34: Add missing regmap use_single config This device requires single register transactions, this will definely cause problems with the new device ID parsing which uses regmap_bulk_read but might also show up in the cache sync sometimes. Add the missing flags to the regmap_config. Fixes: 8cb9b001635c ("ASoC: cs35l34: Minor error paths fixups") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210511175718.15416-3-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l34.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs35l34.c b/sound/soc/codecs/cs35l34.c index 110ee2d063581..3d3c3c34dfe27 100644 --- a/sound/soc/codecs/cs35l34.c +++ b/sound/soc/codecs/cs35l34.c @@ -800,6 +800,9 @@ static struct regmap_config cs35l34_regmap = { .readable_reg = cs35l34_readable_register, .precious_reg = cs35l34_precious_register, .cache_type = REGCACHE_RBTREE, + + .use_single_read = true, + .use_single_write = true, }; static int cs35l34_handle_of_data(struct i2c_client *i2c_client, -- GitLab From b1078e9869531af4f968ba1b9edad51264943bb8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 11 May 2021 18:57:17 +0100 Subject: [PATCH 0474/3804] ASoC: cs35l32: Add missing regmap use_single config This device requires single register transactions, this will definely cause problems with the new device ID parsing which uses regmap_bulk_read but might also show up in the cache sync sometimes. Add the missing flags to the regmap_config. Fixes: 283160f1419d ("ASoC: cs35l32: Minor error paths fixups") Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210511175718.15416-4-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs35l32.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/cs35l32.c b/sound/soc/codecs/cs35l32.c index f4067230ac425..88e79b9f52edc 100644 --- a/sound/soc/codecs/cs35l32.c +++ b/sound/soc/codecs/cs35l32.c @@ -261,6 +261,9 @@ static const struct regmap_config cs35l32_regmap = { .readable_reg = cs35l32_readable_register, .precious_reg = cs35l32_precious_register, .cache_type = REGCACHE_RBTREE, + + .use_single_read = true, + .use_single_write = true, }; static int cs35l32_handle_of_data(struct i2c_client *i2c_client, -- GitLab From 0e49a4de4564b3659a34b0b775d43b6b635b17fa Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 11 May 2021 18:57:18 +0100 Subject: [PATCH 0475/3804] ASoC: cs42l52: Minor tidy up of error paths Fixup a needlessly initialised variable and an unchecked return value. Reported-by: Pierre-Louis Bossart Signed-off-by: Charles Keepax Link: https://lore.kernel.org/r/20210511175718.15416-5-ckeepax@opensource.cirrus.com Signed-off-by: Mark Brown --- sound/soc/codecs/cs42l56.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c index c44a5cdb796ec..7cdffdf6b8cf0 100644 --- a/sound/soc/codecs/cs42l56.c +++ b/sound/soc/codecs/cs42l56.c @@ -1175,7 +1175,7 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client, struct cs42l56_platform_data *pdata = dev_get_platdata(&i2c_client->dev); int ret, i; - unsigned int devid = 0; + unsigned int devid; unsigned int alpha_rev, metal_rev; unsigned int reg; @@ -1245,6 +1245,11 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client, } ret = regmap_read(cs42l56->regmap, CS42L56_CHIP_ID_1, ®); + if (ret) { + dev_err(&i2c_client->dev, "Failed to read chip ID: %d\n", ret); + return ret; + } + devid = reg & CS42L56_CHIP_ID_MASK; if (devid != CS42L56_DEVID) { dev_err(&i2c_client->dev, -- GitLab From 3d681804efcb6e5d8089a433402e19179347d7ae Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 12 May 2021 15:58:24 +0800 Subject: [PATCH 0476/3804] regulator: cros-ec: Fix error code in dev_err message Show proper error code instead of 0. Signed-off-by: Axel Lin Link: https://lore.kernel.org/r/20210512075824.620580-1-axel.lin@ingics.com Signed-off-by: Mark Brown --- drivers/regulator/cros-ec-regulator.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/regulator/cros-ec-regulator.c b/drivers/regulator/cros-ec-regulator.c index eb3fc1db4edc8..c4754f3cf2337 100644 --- a/drivers/regulator/cros-ec-regulator.c +++ b/drivers/regulator/cros-ec-regulator.c @@ -225,8 +225,9 @@ static int cros_ec_regulator_probe(struct platform_device *pdev) drvdata->dev = devm_regulator_register(dev, &drvdata->desc, &cfg); if (IS_ERR(drvdata->dev)) { + ret = PTR_ERR(drvdata->dev); dev_err(&pdev->dev, "Failed to register regulator: %d\n", ret); - return PTR_ERR(drvdata->dev); + return ret; } platform_set_drvdata(pdev, drvdata); -- GitLab From 7907cad7d07e0055789ec0c534452f19dfe1fc80 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Wed, 12 May 2021 17:35:34 +0800 Subject: [PATCH 0477/3804] spi: sprd: Add missing MODULE_DEVICE_TABLE MODULE_DEVICE_TABLE is used to extract the device information out of the driver and builds a table when being compiled. If using this macro, kernel can find the driver if available when the device is plugged in, and then loads that driver and initializes the device. Signed-off-by: Chunyan Zhang Link: https://lore.kernel.org/r/20210512093534.243040-1-zhang.lyra@gmail.com Signed-off-by: Mark Brown --- drivers/spi/spi-sprd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c index b41a75749b498..28e70db9bbba8 100644 --- a/drivers/spi/spi-sprd.c +++ b/drivers/spi/spi-sprd.c @@ -1068,6 +1068,7 @@ static const struct of_device_id sprd_spi_of_match[] = { { .compatible = "sprd,sc9860-spi", }, { /* sentinel */ } }; +MODULE_DEVICE_TABLE(of, sprd_spi_of_match); static struct platform_driver sprd_spi_driver = { .driver = { -- GitLab From 6b69546912a57ff8c31061f98e56383cc0beffd3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 11 May 2021 17:09:12 +0300 Subject: [PATCH 0478/3804] spi: Assume GPIO CS active high in ACPI case Currently GPIO CS handling, when descriptors are in use, doesn't take into consideration that in ACPI case the default polarity is Active High and can't be altered. Instead we have to use the per-chip definition provided by SPISerialBus() resource. Fixes: 766c6b63aa04 ("spi: fix client driver breakages when using GPIO descriptors") Cc: Liguang Zhang Cc: Jay Fang Cc: Sven Van Asbroeck Signed-off-by: Andy Shevchenko Tested-by: Xin Hao Link: https://lore.kernel.org/r/20210511140912.30757-1-andriy.shevchenko@linux.intel.com Signed-off-by: Mark Brown --- drivers/spi/spi.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index a565e7d6bf3ba..98048af04abf9 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -820,15 +820,29 @@ static void spi_set_cs(struct spi_device *spi, bool enable, bool force) if (spi->cs_gpiod || gpio_is_valid(spi->cs_gpio)) { if (!(spi->mode & SPI_NO_CS)) { - if (spi->cs_gpiod) - /* polarity handled by gpiolib */ - gpiod_set_value_cansleep(spi->cs_gpiod, activate); - else + if (spi->cs_gpiod) { + /* + * Historically ACPI has no means of the GPIO polarity and + * thus the SPISerialBus() resource defines it on the per-chip + * basis. In order to avoid a chain of negations, the GPIO + * polarity is considered being Active High. Even for the cases + * when _DSD() is involved (in the updated versions of ACPI) + * the GPIO CS polarity must be defined Active High to avoid + * ambiguity. That's why we use enable, that takes SPI_CS_HIGH + * into account. + */ + if (has_acpi_companion(&spi->dev)) + gpiod_set_value_cansleep(spi->cs_gpiod, !enable); + else + /* Polarity handled by GPIO library */ + gpiod_set_value_cansleep(spi->cs_gpiod, activate); + } else { /* * invert the enable line, as active low is * default for SPI. */ gpio_set_value_cansleep(spi->cs_gpio, !enable); + } } /* Some SPI masters need both GPIO CS & slave_select */ if ((spi->controller->flags & SPI_MASTER_GPIO_SS) && -- GitLab From 2ca4dcc4909d787ee153272f7efc2bff3b498720 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Tue, 11 May 2021 16:30:15 +0200 Subject: [PATCH 0479/3804] fs/mount_setattr: tighten permission checks We currently don't have any filesystems that support idmapped mounts which are mountable inside a user namespace. That was a deliberate decision for now as a userns root can just mount the filesystem themselves. So enforce this restriction explicitly until there's a real use-case for this. This way we can notice it and will have a chance to adapt and audit our translation helpers and fstests appropriately if we need to support such filesystems. Cc: Christoph Hellwig Cc: Al Viro Cc: stable@vger.kernel.org CC: linux-fsdevel@vger.kernel.org Suggested-by: Seth Forshee Signed-off-by: Christian Brauner --- fs/namespace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index f63337828e1c4..c3f1a78ba3697 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -3855,8 +3855,12 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP)) return -EINVAL; + /* Don't yet support filesystem mountable in user namespaces. */ + if (m->mnt_sb->s_user_ns != &init_user_ns) + return -EINVAL; + /* We're not controlling the superblock. */ - if (!ns_capable(m->mnt_sb->s_user_ns, CAP_SYS_ADMIN)) + if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* Mount has already been visible in the filesystem hierarchy. */ -- GitLab From 25cf0d8aa2a3440ed32bf1f8df1310d6baf3f1e8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:33:53 +0200 Subject: [PATCH 0480/3804] objtool: Rewrite hashtable sizing Currently objtool has 5 hashtables and sizes them 16 or 20 bits depending on the --vmlinux argument. However, a single side doesn't really work well for the 5 tables, which among them, cover 3 different uses. Also, while vmlinux is larger, there is still a very wide difference between a defconfig and allyesconfig build, which again isn't optimally covered by a single size. Another aspect is the cost of elf_hash_init(), which for large tables dominates the runtime for small input files. It turns out that all it does it assign NULL, something that is required when using malloc(). However, when we allocate memory using mmap(), we're guaranteed to get zero filled pages. Therefore, rewrite the whole thing to: 1) use more dynamic sized tables, depending on the input file, 2) avoid the need for elf_hash_init() entirely by using mmap(). This speeds up a regular kernel build (100s to 98s for x86_64-defconfig), and potentially dramatically speeds up vmlinux processing. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194157.452881700@infradead.org --- tools/objtool/elf.c | 113 +++++++++++++++++----------- tools/objtool/include/objtool/elf.h | 17 +++-- 2 files changed, 83 insertions(+), 47 deletions(-) diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index d08f5f3670f88..a8a0ee21f71a3 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -27,21 +28,27 @@ static inline u32 str_hash(const char *str) return jhash(str, strlen(str), 0); } -static inline int elf_hash_bits(void) -{ - return vmlinux ? ELF_HASH_BITS : 16; -} +#define __elf_table(name) (elf->name##_hash) +#define __elf_bits(name) (elf->name##_bits) -#define elf_hash_add(hashtable, node, key) \ - hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())]) +#define elf_hash_add(name, node, key) \ + hlist_add_head(node, &__elf_table(name)[hash_min(key, __elf_bits(name))]) -static void elf_hash_init(struct hlist_head *table) -{ - __hash_init(table, 1U << elf_hash_bits()); -} +#define elf_hash_for_each_possible(name, obj, member, key) \ + hlist_for_each_entry(obj, &__elf_table(name)[hash_min(key, __elf_bits(name))], member) -#define elf_hash_for_each_possible(name, obj, member, key) \ - hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member) +#define elf_alloc_hash(name, size) \ +({ \ + __elf_bits(name) = max(10, ilog2(size)); \ + __elf_table(name) = mmap(NULL, sizeof(struct hlist_head) << __elf_bits(name), \ + PROT_READ|PROT_WRITE, \ + MAP_PRIVATE|MAP_ANON, -1, 0); \ + if (__elf_table(name) == (void *)-1L) { \ + WARN("mmap fail " #name); \ + __elf_table(name) = NULL; \ + } \ + __elf_table(name); \ +}) static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b) { @@ -80,9 +87,10 @@ struct section *find_section_by_name(const struct elf *elf, const char *name) { struct section *sec; - elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name)) + elf_hash_for_each_possible(section_name, sec, name_hash, str_hash(name)) { if (!strcmp(sec->name, name)) return sec; + } return NULL; } @@ -92,9 +100,10 @@ static struct section *find_section_by_index(struct elf *elf, { struct section *sec; - elf_hash_for_each_possible(elf->section_hash, sec, hash, idx) + elf_hash_for_each_possible(section, sec, hash, idx) { if (sec->idx == idx) return sec; + } return NULL; } @@ -103,9 +112,10 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx) { struct symbol *sym; - elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx) + elf_hash_for_each_possible(symbol, sym, hash, idx) { if (sym->idx == idx) return sym; + } return NULL; } @@ -170,9 +180,10 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name) { struct symbol *sym; - elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name)) + elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) { if (!strcmp(sym->name, name)) return sym; + } return NULL; } @@ -189,8 +200,8 @@ struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *se sec = sec->reloc; for_offset_range(o, offset, offset + len) { - elf_hash_for_each_possible(elf->reloc_hash, reloc, hash, - sec_offset_hash(sec, o)) { + elf_hash_for_each_possible(reloc, reloc, hash, + sec_offset_hash(sec, o)) { if (reloc->sec != sec) continue; @@ -228,6 +239,10 @@ static int read_sections(struct elf *elf) return -1; } + if (!elf_alloc_hash(section, sections_nr) || + !elf_alloc_hash(section_name, sections_nr)) + return -1; + for (i = 0; i < sections_nr; i++) { sec = malloc(sizeof(*sec)); if (!sec) { @@ -274,12 +289,14 @@ static int read_sections(struct elf *elf) sec->len = sec->sh.sh_size; list_add_tail(&sec->list, &elf->sections); - elf_hash_add(elf->section_hash, &sec->hash, sec->idx); - elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name)); + elf_hash_add(section, &sec->hash, sec->idx); + elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name)); } - if (stats) + if (stats) { printf("nr_sections: %lu\n", (unsigned long)sections_nr); + printf("section_bits: %d\n", elf->section_bits); + } /* sanity check, one more call to elf_nextscn() should return NULL */ if (elf_nextscn(elf->elf, s)) { @@ -308,8 +325,8 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym) else entry = &sym->sec->symbol_list; list_add(&sym->list, entry); - elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx); - elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name)); + elf_hash_add(symbol, &sym->hash, sym->idx); + elf_hash_add(symbol_name, &sym->name_hash, str_hash(sym->name)); /* * Don't store empty STT_NOTYPE symbols in the rbtree. They @@ -329,19 +346,25 @@ static int read_symbols(struct elf *elf) Elf32_Word shndx; symtab = find_section_by_name(elf, ".symtab"); - if (!symtab) { + if (symtab) { + symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); + if (symtab_shndx) + shndx_data = symtab_shndx->data; + + symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize; + } else { /* * A missing symbol table is actually possible if it's an empty - * .o file. This can happen for thunk_64.o. + * .o file. This can happen for thunk_64.o. Make sure to at + * least allocate the symbol hash tables so we can do symbol + * lookups without crashing. */ - return 0; + symbols_nr = 0; } - symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); - if (symtab_shndx) - shndx_data = symtab_shndx->data; - - symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize; + if (!elf_alloc_hash(symbol, symbols_nr) || + !elf_alloc_hash(symbol_name, symbols_nr)) + return -1; for (i = 0; i < symbols_nr; i++) { sym = malloc(sizeof(*sym)); @@ -389,8 +412,10 @@ static int read_symbols(struct elf *elf) elf_add_symbol(elf, sym); } - if (stats) + if (stats) { printf("nr_symbols: %lu\n", (unsigned long)symbols_nr); + printf("symbol_bits: %d\n", elf->symbol_bits); + } /* Create parent/child links for any cold subfunctions */ list_for_each_entry(sec, &elf->sections, list) { @@ -479,7 +504,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, reloc->addend = addend; list_add_tail(&reloc->list, &sec->reloc->reloc_list); - elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc)); + elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); sec->reloc->changed = true; @@ -556,6 +581,15 @@ static int read_relocs(struct elf *elf) unsigned int symndx; unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0; + sec = find_section_by_name(elf, ".text"); + if (!sec) { + WARN("no .text"); + return -1; + } + + if (!elf_alloc_hash(reloc, sec->len / 16)) + return -1; + list_for_each_entry(sec, &elf->sections, list) { if ((sec->sh.sh_type != SHT_RELA) && (sec->sh.sh_type != SHT_REL)) @@ -600,7 +634,7 @@ static int read_relocs(struct elf *elf) } list_add_tail(&reloc->list, &sec->reloc_list); - elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc)); + elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); nr_reloc++; } @@ -611,6 +645,7 @@ static int read_relocs(struct elf *elf) if (stats) { printf("max_reloc: %lu\n", max_reloc); printf("tot_reloc: %lu\n", tot_reloc); + printf("reloc_bits: %d\n", elf->reloc_bits); } return 0; @@ -632,12 +667,6 @@ struct elf *elf_open_read(const char *name, int flags) INIT_LIST_HEAD(&elf->sections); - elf_hash_init(elf->symbol_hash); - elf_hash_init(elf->symbol_name_hash); - elf_hash_init(elf->section_hash); - elf_hash_init(elf->section_name_hash); - elf_hash_init(elf->reloc_hash); - elf->fd = open(name, flags); if (elf->fd == -1) { fprintf(stderr, "objtool: Can't open '%s': %s\n", @@ -850,8 +879,8 @@ struct section *elf_create_section(struct elf *elf, const char *name, return NULL; list_add_tail(&sec->list, &elf->sections); - elf_hash_add(elf->section_hash, &sec->hash, sec->idx); - elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name)); + elf_hash_add(section, &sec->hash, sec->idx); + elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name)); elf->changed = true; diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 45e5ede363b07..90082751f851d 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -84,11 +84,18 @@ struct elf { bool changed; char *name; struct list_head sections; - DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS); - DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS); - DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS); - DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS); - DECLARE_HASHTABLE(reloc_hash, ELF_HASH_BITS); + + int symbol_bits; + int symbol_name_bits; + int section_bits; + int section_name_bits; + int reloc_bits; + + struct hlist_head *symbol_hash; + struct hlist_head *symbol_name_hash; + struct hlist_head *section_hash; + struct hlist_head *section_name_hash; + struct hlist_head *reloc_hash; }; #define OFFSET_STRIDE_BITS 4 -- GitLab From 80870e6ece78ce67b91398db88fb6b92a178f574 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:33:54 +0200 Subject: [PATCH 0481/3804] x86, objtool: Dont exclude arch/x86/realmode/ Specifically, init.c uses jump_labels. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194157.516200011@infradead.org --- arch/x86/realmode/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/realmode/Makefile b/arch/x86/realmode/Makefile index 6b1f3a4eeb44e..a0b491ae2de8d 100644 --- a/arch/x86/realmode/Makefile +++ b/arch/x86/realmode/Makefile @@ -10,7 +10,6 @@ # Sanitizer runtimes are unavailable and cannot be linked here. KASAN_SANITIZE := n KCSAN_SANITIZE := n -OBJECT_FILES_NON_STANDARD := y subdir- := rm -- GitLab From 8bfafcdccb52e770695b12530b1f800fe98b16b1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:33:55 +0200 Subject: [PATCH 0482/3804] jump_label, x86: Strip ASM jump_label support In prepration for variable size jump_label support; remove all ASM bits, which are currently unused. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194157.599716762@infradead.org --- arch/x86/include/asm/jump_label.h | 36 ------------------------------- 1 file changed, 36 deletions(-) diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 610a05374c02f..01de21e2d9679 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -47,42 +47,6 @@ l_yes: return true; } -#else /* __ASSEMBLY__ */ - -.macro STATIC_JUMP_IF_TRUE target, key, def -.Lstatic_jump_\@: - .if \def - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .else - .byte BYTES_NOP5 - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key - . - .popsection -.endm - -.macro STATIC_JUMP_IF_FALSE target, key, def -.Lstatic_jump_\@: - .if \def - .byte BYTES_NOP5 - .else - /* Equivalent to "jmp.d32 \target" */ - .byte 0xe9 - .long \target - .Lstatic_jump_after_\@ -.Lstatic_jump_after_\@: - .endif - .pushsection __jump_table, "aw" - _ASM_ALIGN - .long .Lstatic_jump_\@ - ., \target - . - _ASM_PTR \key + 1 - . - .popsection -.endm - #endif /* __ASSEMBLY__ */ #endif -- GitLab From e1aa35c4c4bc71e44dabc9d7d167b807edd7b439 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:33:56 +0200 Subject: [PATCH 0483/3804] jump_label, x86: Factor out the __jump_table generation Both arch_static_branch() and arch_static_branch_jump() have the same blurb to generate the __jump_table entry, share it. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194157.663132781@infradead.org --- arch/x86/include/asm/jump_label.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index 01de21e2d9679..dfdc2b1c17ddd 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -14,15 +14,19 @@ #include #include +#define JUMP_TABLE_ENTRY \ + ".pushsection __jump_table, \"aw\" \n\t" \ + _ASM_ALIGN "\n\t" \ + ".long 1b - . \n\t" \ + ".long %l[l_yes] - . \n\t" \ + _ASM_PTR "%c0 + %c1 - .\n\t" \ + ".popsection \n\t" + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" ".byte " __stringify(BYTES_NOP5) "\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); return false; @@ -33,13 +37,9 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" - ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t" - "2:\n\t" - ".pushsection __jump_table, \"aw\" \n\t" - _ASM_ALIGN "\n\t" - ".long 1b - ., %l[l_yes] - . \n\t" - _ASM_PTR "%c0 + %c1 - .\n\t" - ".popsection \n\t" + ".byte 0xe9 \n\t" + ".long %l[l_yes] - (. + 4) \n\t" + JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); return false; -- GitLab From f9510fa9caaf8229381d5f86ba0774bf1a6ca39b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:33:57 +0200 Subject: [PATCH 0484/3804] jump_label, x86: Improve error when we fail expected text There is only a single usage site left, remove the function and extend the print to include more information, like the expected text and the patch type. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194157.726939027@infradead.org --- arch/x86/kernel/jump_label.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 6a2eb62c85e62..638d3b9be0ad2 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -16,37 +16,32 @@ #include #include -static void bug_at(const void *ip, int line) -{ - /* - * The location is not an op that we were expecting. - * Something went wrong. Crash the box, as something could be - * corrupting the kernel. - */ - pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line); - BUG(); -} - static const void * __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type) { const void *expect, *code; const void *addr, *dest; - int line; addr = (void *)jump_entry_code(entry); dest = (void *)jump_entry_target(entry); code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); - if (type == JUMP_LABEL_JMP) { - expect = x86_nops[5]; line = __LINE__; - } else { - expect = code; line = __LINE__; - } + if (type == JUMP_LABEL_JMP) + expect = x86_nops[5]; + else + expect = code; - if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) - bug_at(addr, line); + if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) { + /* + * The location is not an op that we were expecting. + * Something went wrong. Crash the box, as something could be + * corrupting the kernel. + */ + pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) type:%d\n", + addr, addr, addr, expect, type); + BUG(); + } if (type == JUMP_LABEL_NOP) code = x86_nops[5]; -- GitLab From fa5e5dc39669b4427830c546ede8709323b8276c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:33:58 +0200 Subject: [PATCH 0485/3804] jump_label, x86: Introduce jump_entry_size() This allows architectures to have variable sized jumps. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194157.786777050@infradead.org --- arch/x86/include/asm/jump_label.h | 4 ++-- arch/x86/kernel/jump_label.c | 7 +++++++ include/linux/jump_label.h | 9 +++++++++ kernel/jump_label.c | 2 +- 4 files changed, 19 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index dfdc2b1c17ddd..d85802a006296 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -4,8 +4,6 @@ #define HAVE_JUMP_LABEL_BATCH -#define JUMP_LABEL_NOP_SIZE 5 - #include #include @@ -47,6 +45,8 @@ l_yes: return true; } +extern int arch_jump_entry_size(struct jump_entry *entry); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 638d3b9be0ad2..a29eecc14c94d 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -16,6 +16,13 @@ #include #include +#define JUMP_LABEL_NOP_SIZE JMP32_INSN_SIZE + +int arch_jump_entry_size(struct jump_entry *entry) +{ + return JMP32_INSN_SIZE; +} + static const void * __jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type) { diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 05f5554d860f5..8c45f58292ac4 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -176,6 +176,15 @@ static inline void jump_entry_set_init(struct jump_entry *entry) entry->key |= 2; } +static inline int jump_entry_size(struct jump_entry *entry) +{ +#ifdef JUMP_LABEL_NOP_SIZE + return JUMP_LABEL_NOP_SIZE; +#else + return arch_jump_entry_size(entry); +#endif +} + #endif #endif diff --git a/kernel/jump_label.c b/kernel/jump_label.c index ba39fbb1f8e73..521cafcfcb69b 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -309,7 +309,7 @@ EXPORT_SYMBOL_GPL(jump_label_rate_limit); static int addr_conflict(struct jump_entry *entry, void *start, void *end) { if (jump_entry_code(entry) <= (unsigned long)end && - jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE > (unsigned long)start) + jump_entry_code(entry) + jump_entry_size(entry) > (unsigned long)start) return 1; return 0; -- GitLab From 001951bea748d3f675e1778f42b17290a8c551bf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:33:59 +0200 Subject: [PATCH 0486/3804] jump_label, x86: Add variable length patching support This allows the patching to to emit 2 byte JMP/NOP instruction in addition to the 5 byte JMP/NOP we already did. This allows for more compact code. This code is not yet used, as we don't emit shorter code at compile time yet. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194157.846870383@infradead.org --- arch/x86/kernel/jump_label.c | 53 ++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index a29eecc14c94d..190d810fa435d 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -23,44 +23,63 @@ int arch_jump_entry_size(struct jump_entry *entry) return JMP32_INSN_SIZE; } -static const void * -__jump_label_set_jump_code(struct jump_entry *entry, enum jump_label_type type) +struct jump_label_patch { + const void *code; + int size; +}; + +static struct jump_label_patch +__jump_label_patch(struct jump_entry *entry, enum jump_label_type type) { - const void *expect, *code; + const void *expect, *code, *nop; const void *addr, *dest; + int size; addr = (void *)jump_entry_code(entry); dest = (void *)jump_entry_target(entry); - code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); + size = arch_jump_entry_size(entry); + switch (size) { + case JMP8_INSN_SIZE: + code = text_gen_insn(JMP8_INSN_OPCODE, addr, dest); + nop = x86_nops[size]; + break; + + case JMP32_INSN_SIZE: + code = text_gen_insn(JMP32_INSN_OPCODE, addr, dest); + nop = x86_nops[size]; + break; + + default: BUG(); + } if (type == JUMP_LABEL_JMP) - expect = x86_nops[5]; + expect = nop; else expect = code; - if (memcmp(addr, expect, JUMP_LABEL_NOP_SIZE)) { + if (memcmp(addr, expect, size)) { /* * The location is not an op that we were expecting. * Something went wrong. Crash the box, as something could be * corrupting the kernel. */ - pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) type:%d\n", - addr, addr, addr, expect, type); + pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph != %5ph)) size:%d type:%d\n", + addr, addr, addr, expect, size, type); BUG(); } if (type == JUMP_LABEL_NOP) - code = x86_nops[5]; + code = nop; - return code; + return (struct jump_label_patch){.code = code, .size = size}; } static inline void __jump_label_transform(struct jump_entry *entry, enum jump_label_type type, int init) { - const void *opcode = __jump_label_set_jump_code(entry, type); + const struct jump_label_patch jlp = __jump_label_patch(entry, type); /* * As long as only a single processor is running and the code is still @@ -74,12 +93,11 @@ static inline void __jump_label_transform(struct jump_entry *entry, * always nop being the 'currently valid' instruction */ if (init || system_state == SYSTEM_BOOTING) { - text_poke_early((void *)jump_entry_code(entry), opcode, - JUMP_LABEL_NOP_SIZE); + text_poke_early((void *)jump_entry_code(entry), jlp.code, jlp.size); return; } - text_poke_bp((void *)jump_entry_code(entry), opcode, JUMP_LABEL_NOP_SIZE, NULL); + text_poke_bp((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL); } static void __ref jump_label_transform(struct jump_entry *entry, @@ -100,7 +118,7 @@ void arch_jump_label_transform(struct jump_entry *entry, bool arch_jump_label_transform_queue(struct jump_entry *entry, enum jump_label_type type) { - const void *opcode; + struct jump_label_patch jlp; if (system_state == SYSTEM_BOOTING) { /* @@ -111,9 +129,8 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry, } mutex_lock(&text_mutex); - opcode = __jump_label_set_jump_code(entry, type); - text_poke_queue((void *)jump_entry_code(entry), - opcode, JUMP_LABEL_NOP_SIZE, NULL); + jlp = __jump_label_patch(entry, type); + text_poke_queue((void *)jump_entry_code(entry), jlp.code, jlp.size, NULL); mutex_unlock(&text_mutex); return true; } -- GitLab From 5af0ea293d78c8b8f0b87ae2b13f7ac584057bc3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:34:00 +0200 Subject: [PATCH 0487/3804] jump_label: Free jump_entry::key bit1 for build use Have jump_label_init() set jump_entry::key bit1 to either 0 ot 1 unconditionally. This makes it available for build-time games. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194157.906893264@infradead.org --- include/linux/jump_label.h | 7 +++++-- kernel/jump_label.c | 10 ++++++---- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 8c45f58292ac4..48b9b2a82767d 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -171,9 +171,12 @@ static inline bool jump_entry_is_init(const struct jump_entry *entry) return (unsigned long)entry->key & 2UL; } -static inline void jump_entry_set_init(struct jump_entry *entry) +static inline void jump_entry_set_init(struct jump_entry *entry, bool set) { - entry->key |= 2; + if (set) + entry->key |= 2; + else + entry->key &= ~2; } static inline int jump_entry_size(struct jump_entry *entry) diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 521cafcfcb69b..bdb0681bece82 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -483,13 +483,14 @@ void __init jump_label_init(void) for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; + bool in_init; /* rewrite NOPs */ if (jump_label_type(iter) == JUMP_LABEL_NOP) arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); - if (init_section_contains((void *)jump_entry_code(iter), 1)) - jump_entry_set_init(iter); + in_init = init_section_contains((void *)jump_entry_code(iter), 1); + jump_entry_set_init(iter, in_init); iterk = jump_entry_key(iter); if (iterk == key) @@ -634,9 +635,10 @@ static int jump_label_add_module(struct module *mod) for (iter = iter_start; iter < iter_stop; iter++) { struct static_key *iterk; + bool in_init; - if (within_module_init(jump_entry_code(iter), mod)) - jump_entry_set_init(iter); + in_init = within_module_init(jump_entry_code(iter), mod); + jump_entry_set_init(iter, in_init); iterk = jump_entry_key(iter); if (iterk == key) -- GitLab From e7bf1ba97afdde75b0ef43e4bdb718bf843613f1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:34:01 +0200 Subject: [PATCH 0488/3804] jump_label, x86: Emit short JMP Now that we can patch short JMP/NOP, allow the compiler/assembler to emit short JMP instructions. There is no way to have the assembler emit short NOPs based on the potential displacement, so leave those long for now. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194157.967034497@infradead.org --- arch/x86/include/asm/jump_label.h | 3 +-- arch/x86/kernel/jump_label.c | 8 +++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index d85802a006296..ef819e33cfc16 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -35,8 +35,7 @@ l_yes: static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" - ".byte 0xe9 \n\t" - ".long %l[l_yes] - (. + 4) \n\t" + "jmp %l[l_yes]\n\t" JUMP_TABLE_ENTRY : : "i" (key), "i" (branch) : : l_yes); diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index 190d810fa435d..a762dc1c615eb 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -15,12 +15,18 @@ #include #include #include +#include #define JUMP_LABEL_NOP_SIZE JMP32_INSN_SIZE int arch_jump_entry_size(struct jump_entry *entry) { - return JMP32_INSN_SIZE; + struct insn insn = {}; + + insn_decode_kernel(&insn, (void *)jump_entry_code(entry)); + BUG_ON(insn.length != 2 && insn.length != 5); + + return insn.length; } struct jump_label_patch { -- GitLab From cbf82a3dc241aea82b941a872ed5c52f6af527ea Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:34:02 +0200 Subject: [PATCH 0489/3804] objtool: Decode jump_entry::key addend Teach objtool about the the low bits in the struct static_key pointer. That is, the low two bits of @key in: struct jump_entry { s32 code; s32 target; long key; } as found in the __jump_table section. Since @key has a relocation to the variable (to be resolved by the linker), the low two bits will be reflected in the relocation's addend. As such, find the reloc and store the addend, such that we can access these bits. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194158.028024143@infradead.org --- tools/objtool/arch/x86/include/arch/special.h | 1 + tools/objtool/include/objtool/special.h | 1 + tools/objtool/special.c | 14 ++++++++++++++ 3 files changed, 16 insertions(+) diff --git a/tools/objtool/arch/x86/include/arch/special.h b/tools/objtool/arch/x86/include/arch/special.h index 14271cca0c740..f2918f789a0a3 100644 --- a/tools/objtool/arch/x86/include/arch/special.h +++ b/tools/objtool/arch/x86/include/arch/special.h @@ -9,6 +9,7 @@ #define JUMP_ENTRY_SIZE 16 #define JUMP_ORIG_OFFSET 0 #define JUMP_NEW_OFFSET 4 +#define JUMP_KEY_OFFSET 8 #define ALT_ENTRY_SIZE 12 #define ALT_ORIG_OFFSET 0 diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h index 8a09f4e9d480e..dc4721e190023 100644 --- a/tools/objtool/include/objtool/special.h +++ b/tools/objtool/include/objtool/special.h @@ -27,6 +27,7 @@ struct special_alt { unsigned long new_off; unsigned int orig_len, new_len; /* group only */ + u8 key_addend; }; int special_get_alts(struct elf *elf, struct list_head *alts); diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 07b21cfabf5c0..bc925cf19e2de 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -23,6 +23,7 @@ struct special_entry { unsigned char size, orig, new; unsigned char orig_len, new_len; /* group only */ unsigned char feature; /* ALTERNATIVE macro CPU feature */ + unsigned char key; /* jump_label key */ }; struct special_entry entries[] = { @@ -42,6 +43,7 @@ struct special_entry entries[] = { .size = JUMP_ENTRY_SIZE, .orig = JUMP_ORIG_OFFSET, .new = JUMP_NEW_OFFSET, + .key = JUMP_KEY_OFFSET, }, { .sec = "__ex_table", @@ -122,6 +124,18 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, alt->new_off -= 0x7ffffff0; } + if (entry->key) { + struct reloc *key_reloc; + + key_reloc = find_reloc_by_dest(elf, sec, offset + entry->key); + if (!key_reloc) { + WARN_FUNC("can't find key reloc", + sec, offset + entry->key); + return -1; + } + alt->key_addend = key_reloc->addend; + } + return 0; } -- GitLab From 6d37b83c5d79ef5996cc49c3e3ac3d8ecd8c7050 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:34:03 +0200 Subject: [PATCH 0490/3804] objtool: Rewrite jump_label instructions When a jump_entry::key has bit1 set, rewrite the instruction to be a NOP. This allows the compiler/assembler to emit JMP (and thus decide on which encoding to use). Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194158.091028792@infradead.org --- tools/objtool/check.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9ed1a4cd00dc0..98cf87f2c5019 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1234,6 +1234,20 @@ static int handle_jump_alt(struct objtool_file *file, return -1; } + if (special_alt->key_addend & 2) { + struct reloc *reloc = insn_reloc(file, orig_insn); + + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); + } + elf_write_insn(file->elf, orig_insn->sec, + orig_insn->offset, orig_insn->len, + arch_nop_insn(orig_insn->len)); + orig_insn->type = INSN_NOP; + return 0; + } + *new_insn = list_next_entry(orig_insn, list); return 0; } -- GitLab From e2d9494beff21a26438eb611c260b8a6c2dc4dbf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:34:04 +0200 Subject: [PATCH 0491/3804] objtool: Provide stats for jump_labels Add objtool --stats to count the jump_label sites it encounters. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194158.153101906@infradead.org --- tools/objtool/check.c | 22 ++++++++++++++++++++-- tools/objtool/include/objtool/objtool.h | 3 +++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 98cf87f2c5019..2c6a93edf27ec 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1225,8 +1225,15 @@ static int handle_jump_alt(struct objtool_file *file, struct instruction *orig_insn, struct instruction **new_insn) { - if (orig_insn->type == INSN_NOP) + if (orig_insn->type == INSN_NOP) { +do_nop: + if (orig_insn->len == 2) + file->jl_nop_short++; + else + file->jl_nop_long++; + return 0; + } if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) { WARN_FUNC("unsupported instruction at jump label", @@ -1245,9 +1252,14 @@ static int handle_jump_alt(struct objtool_file *file, orig_insn->offset, orig_insn->len, arch_nop_insn(orig_insn->len)); orig_insn->type = INSN_NOP; - return 0; + goto do_nop; } + if (orig_insn->len == 2) + file->jl_short++; + else + file->jl_long++; + *new_insn = list_next_entry(orig_insn, list); return 0; } @@ -1328,6 +1340,12 @@ static int add_special_section_alts(struct objtool_file *file) free(special_alt); } + if (stats) { + printf("jl\\\tNOP\tJMP\n"); + printf("short:\t%ld\t%ld\n", file->jl_nop_short, file->jl_short); + printf("long:\t%ld\t%ld\n", file->jl_nop_long, file->jl_long); + } + out: return ret; } diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index e4084afb2304b..24fa83634de4d 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -22,6 +22,9 @@ struct objtool_file { struct list_head static_call_list; struct list_head mcount_loc_list; bool ignore_unreachables, c_file, hints, rodata; + + unsigned long jl_short, jl_long; + unsigned long jl_nop_short, jl_nop_long; }; struct objtool_file *objtool_open_read(const char *_objname); -- GitLab From ab3257042c26d0cd44793c741e2f89bf38b21fe8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 May 2021 21:34:05 +0200 Subject: [PATCH 0492/3804] jump_label, x86: Allow short NOPs Now that objtool is able to rewrite jump_label instructions, have the compiler emit a JMP, such that it can decide on the optimal encoding, and set jump_entry::key bit1 to indicate that objtool should rewrite the instruction to a matching NOP. For x86_64-allyesconfig this gives: jl\ NOP JMP short: 22997 124 long: 30874 90 IOW, we save (22997+124) * 3 bytes of kernel text in hotpaths. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20210506194158.216763632@infradead.org --- arch/x86/include/asm/jump_label.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h index ef819e33cfc16..0449b125d27f3 100644 --- a/arch/x86/include/asm/jump_label.h +++ b/arch/x86/include/asm/jump_label.h @@ -20,6 +20,22 @@ _ASM_PTR "%c0 + %c1 - .\n\t" \ ".popsection \n\t" +#ifdef CONFIG_STACK_VALIDATION + +static __always_inline bool arch_static_branch(struct static_key *key, bool branch) +{ + asm_volatile_goto("1:" + "jmp %l[l_yes] # objtool NOPs this \n\t" + JUMP_TABLE_ENTRY + : : "i" (key), "i" (2 | branch) : : l_yes); + + return false; +l_yes: + return true; +} + +#else + static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" @@ -32,6 +48,8 @@ l_yes: return true; } +#endif /* STACK_VALIDATION */ + static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch) { asm_volatile_goto("1:" -- GitLab From 7ea96eefb0097d243af62fc672be9f17b10338b3 Mon Sep 17 00:00:00 2001 From: Paolo Valente Date: Wed, 12 May 2021 11:43:52 +0200 Subject: [PATCH 0493/3804] block, bfq: avoid circular stable merges BFQ may merge a new bfq_queue, stably, with the last bfq_queue created. In particular, BFQ first waits a little bit for some I/O to flow inside the new queue, say Q2, if this is needed to understand whether it is better or worse to merge Q2 with the last queue created, say Q1. This delayed stable merge is performed by assigning bic->stable_merge_bfqq = Q1, for the bic associated with Q1. Yet, while waiting for some I/O to flow in Q2, a non-stable queue merge of Q2 with Q1 may happen, causing the bic previously associated with Q2 to be associated with exactly Q1 (bic->bfqq = Q1). After that, Q2 and Q1 may happen to be split, and, in the split, Q1 may happen to be recycled as a non-shared bfq_queue. In that case, Q1 may then happen to undergo a stable merge with the bfq_queue pointed by bic->stable_merge_bfqq. Yet bic->stable_merge_bfqq still points to Q1. So Q1 would be merged with itself. This commit fixes this error by intercepting this situation, and canceling the schedule of the stable merge. Fixes: 430a67f9d616 ("block, bfq: merge bursts of newly-created queues") Signed-off-by: Pietro Pedroni Signed-off-by: Paolo Valente Link: https://lore.kernel.org/r/20210512094352.85545-2-paolo.valente@linaro.org Signed-off-by: Jens Axboe --- block/bfq-iosched.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 59b2499d3f8be..acd1f881273e0 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -372,9 +372,38 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync) return bic->bfqq[is_sync]; } +static void bfq_put_stable_ref(struct bfq_queue *bfqq); + void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync) { + /* + * If bfqq != NULL, then a non-stable queue merge between + * bic->bfqq and bfqq is happening here. This causes troubles + * in the following case: bic->bfqq has also been scheduled + * for a possible stable merge with bic->stable_merge_bfqq, + * and bic->stable_merge_bfqq == bfqq happens to + * hold. Troubles occur because bfqq may then undergo a split, + * thereby becoming eligible for a stable merge. Yet, if + * bic->stable_merge_bfqq points exactly to bfqq, then bfqq + * would be stably merged with itself. To avoid this anomaly, + * we cancel the stable merge if + * bic->stable_merge_bfqq == bfqq. + */ bic->bfqq[is_sync] = bfqq; + + if (bfqq && bic->stable_merge_bfqq == bfqq) { + /* + * Actually, these same instructions are executed also + * in bfq_setup_cooperator, in case of abort or actual + * execution of a stable merge. We could avoid + * repeating these instructions there too, but if we + * did so, we would nest even more complexity in this + * function. + */ + bfq_put_stable_ref(bic->stable_merge_bfqq); + + bic->stable_merge_bfqq = NULL; + } } struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic) @@ -2630,8 +2659,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, struct bfq_queue *bfqq); -static void bfq_put_stable_ref(struct bfq_queue *bfqq); - /* * Attempt to schedule a merge of bfqq with the currently in-service * queue or with a close queue among the scheduled queues. Return -- GitLab From 190515f610946db025cdedebde93958b725fb583 Mon Sep 17 00:00:00 2001 From: Lin Feng Date: Wed, 12 May 2021 18:01:24 +0800 Subject: [PATCH 0494/3804] blkdev.h: remove unused codes blk_account_rq Last users of blk_account_rq gone with patch commit a1ce35fa49852db ("block: remove dead elevator code") and now it gets no caller, it can be safely removed. Signed-off-by: Lin Feng Link: https://lore.kernel.org/r/20210512100124.173769-1-linf@wangsu.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b91ba6207365b..26c3e368656f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -677,11 +677,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -static inline bool blk_account_rq(struct request *rq) -{ - return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); -} - #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) -- GitLab From 2404b8747019184002823dba7d2f0ecf89d802b7 Mon Sep 17 00:00:00 2001 From: Sumeet Pawnikar Date: Tue, 11 May 2021 23:31:42 +0530 Subject: [PATCH 0495/3804] ACPI: PM: Add ACPI ID of Alder Lake Fan Add a new unique fan ACPI device ID for Alder Lake to support it in acpi_dev_pm_attach() function. Fixes: 38748bcb940e ("ACPI: DPTF: Support Alder Lake") Signed-off-by: Sumeet Pawnikar Acked-by: Zhang Rui Cc: 5.10+ # 5.10+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 096153761ebc3..58876248b1921 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -1310,6 +1310,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on) {"PNP0C0B", }, /* Generic ACPI fan */ {"INT3404", }, /* Fan */ {"INTC1044", }, /* Fan for Tiger Lake generation */ + {"INTC1048", }, /* Fan for Alder Lake generation */ {} }; struct acpi_device *adev = ACPI_COMPANION(dev); -- GitLab From f395183f9544ba2f56b25938d6ea7042bd873521 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 12 May 2021 07:38:00 -0700 Subject: [PATCH 0496/3804] f2fs: return EINVAL for hole cases in swap file This tries to fix xfstests/generic/495. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 41e260680b27c..009a09fb9d88c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3896,7 +3896,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis, /* hole */ if (!(map.m_flags & F2FS_MAP_FLAGS)) { f2fs_err(sbi, "Swapfile has holes\n"); - ret = -ENOENT; + ret = -EINVAL; goto out; } @@ -4052,7 +4052,7 @@ out: return ret; bad_bmap: f2fs_err(sbi, "Swapfile has holes\n"); - return -ENOENT; + return -EINVAL; } static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, -- GitLab From 79ebe9110fa458d58f1fceb078e2068d7ad37390 Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Wed, 12 May 2021 19:43:30 +0800 Subject: [PATCH 0497/3804] nbd: Fix NULL pointer in flush_workqueue Open /dev/nbdX first, the config_refs will be 1 and the pointers in nbd_device are still null. Disconnect /dev/nbdX, then reference a null recv_workq. The protection by config_refs in nbd_genl_disconnect is useless. [ 656.366194] BUG: kernel NULL pointer dereference, address: 0000000000000020 [ 656.368943] #PF: supervisor write access in kernel mode [ 656.369844] #PF: error_code(0x0002) - not-present page [ 656.370717] PGD 10cc87067 P4D 10cc87067 PUD 1074b4067 PMD 0 [ 656.371693] Oops: 0002 [#1] SMP [ 656.372242] CPU: 5 PID: 7977 Comm: nbd-client Not tainted 5.11.0-rc5-00040-g76c057c84d28 #1 [ 656.373661] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20190727_073836-buildvm-ppc64le-16.ppc.fedoraproject.org-3.fc31 04/01/2014 [ 656.375904] RIP: 0010:mutex_lock+0x29/0x60 [ 656.376627] Code: 00 0f 1f 44 00 00 55 48 89 fd 48 83 05 6f d7 fe 08 01 e8 7a c3 ff ff 48 83 05 6a d7 fe 08 01 31 c0 65 48 8b 14 25 00 6d 01 00 48 0f b1 55 d [ 656.378934] RSP: 0018:ffffc900005eb9b0 EFLAGS: 00010246 [ 656.379350] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 656.379915] RDX: ffff888104cf2600 RSI: ffffffffaae8f452 RDI: 0000000000000020 [ 656.380473] RBP: 0000000000000020 R08: 0000000000000000 R09: ffff88813bd6b318 [ 656.381039] R10: 00000000000000c7 R11: fefefefefefefeff R12: ffff888102710b40 [ 656.381599] R13: ffffc900005eb9e0 R14: ffffffffb2930680 R15: ffff88810770ef00 [ 656.382166] FS: 00007fdf117ebb40(0000) GS:ffff88813bd40000(0000) knlGS:0000000000000000 [ 656.382806] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 656.383261] CR2: 0000000000000020 CR3: 0000000100c84000 CR4: 00000000000006e0 [ 656.383819] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 656.384370] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 656.384927] Call Trace: [ 656.385111] flush_workqueue+0x92/0x6c0 [ 656.385395] nbd_disconnect_and_put+0x81/0xd0 [ 656.385716] nbd_genl_disconnect+0x125/0x2a0 [ 656.386034] genl_family_rcv_msg_doit.isra.0+0x102/0x1b0 [ 656.386422] genl_rcv_msg+0xfc/0x2b0 [ 656.386685] ? nbd_ioctl+0x490/0x490 [ 656.386954] ? genl_family_rcv_msg_doit.isra.0+0x1b0/0x1b0 [ 656.387354] netlink_rcv_skb+0x62/0x180 [ 656.387638] genl_rcv+0x34/0x60 [ 656.387874] netlink_unicast+0x26d/0x590 [ 656.388162] netlink_sendmsg+0x398/0x6c0 [ 656.388451] ? netlink_rcv_skb+0x180/0x180 [ 656.388750] ____sys_sendmsg+0x1da/0x320 [ 656.389038] ? ____sys_recvmsg+0x130/0x220 [ 656.389334] ___sys_sendmsg+0x8e/0xf0 [ 656.389605] ? ___sys_recvmsg+0xa2/0xf0 [ 656.389889] ? handle_mm_fault+0x1671/0x21d0 [ 656.390201] __sys_sendmsg+0x6d/0xe0 [ 656.390464] __x64_sys_sendmsg+0x23/0x30 [ 656.390751] do_syscall_64+0x45/0x70 [ 656.391017] entry_SYSCALL_64_after_hwframe+0x44/0xa9 To fix it, just add if (nbd->recv_workq) to nbd_disconnect_and_put(). Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") Signed-off-by: Sun Ke Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210512114331.1233964-2-sunke32@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 4ff71b579cfcc..974da561b8e5e 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1980,7 +1980,8 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd) * config ref and try to destroy the workqueue from inside the work * queue. */ - flush_workqueue(nbd->recv_workq); + if (nbd->recv_workq) + flush_workqueue(nbd->recv_workq); if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF, &nbd->config->runtime_flags)) nbd_config_put(nbd); -- GitLab From bedf78c4cbbbb65e42ede5ca2bd21887ef5b7060 Mon Sep 17 00:00:00 2001 From: Sun Ke Date: Wed, 12 May 2021 19:43:31 +0800 Subject: [PATCH 0498/3804] nbd: share nbd_put and return by goto put_nbd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the following two statements by the statement “goto put_nbd;” nbd_put(nbd); return 0; Signed-off-by: Sun Ke Suggested-by: Markus Elfring Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210512114331.1233964-3-sunke32@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 974da561b8e5e..45d2c28c8fc83 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2015,12 +2015,11 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } mutex_unlock(&nbd_index_mutex); - if (!refcount_inc_not_zero(&nbd->config_refs)) { - nbd_put(nbd); - return 0; - } + if (!refcount_inc_not_zero(&nbd->config_refs)) + goto put_nbd; nbd_disconnect_and_put(nbd); nbd_config_put(nbd); +put_nbd: nbd_put(nbd); return 0; } -- GitLab From f8c8871f5eff3981eeb13421aca2c1cfda4a5204 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Tue, 11 May 2021 17:13:33 -0400 Subject: [PATCH 0499/3804] regulator: fan53555: fix TCS4525 voltage calulation The TCS4525 has 128 voltage steps. With the calculation set to 127 the most significant bit is disregarded which leads to a miscalculation of the voltage by about 200mv. Fix the calculation to end deadlock on the rk3566-quartz64 which uses this as the cpu regulator. Fixes: 914df8faa7d6 ("regulator: fan53555: Add TCS4525 DCDC support") Signed-off-by: Peter Geis Link: https://lore.kernel.org/r/20210511211335.2935163-2-pgwipeout@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/fan53555.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c index f3918f03aaf3d..26f06f685b1b6 100644 --- a/drivers/regulator/fan53555.c +++ b/drivers/regulator/fan53555.c @@ -55,7 +55,6 @@ #define FAN53555_NVOLTAGES 64 /* Numbers of voltages */ #define FAN53526_NVOLTAGES 128 -#define TCS4525_NVOLTAGES 127 /* Numbers of voltages */ #define TCS_VSEL_NSEL_MASK 0x7f #define TCS_VSEL0_MODE (1 << 7) @@ -376,7 +375,7 @@ static int fan53555_voltages_setup_tcs(struct fan53555_device_info *di) /* Init voltage range and step */ di->vsel_min = 600000; di->vsel_step = 6250; - di->vsel_count = TCS4525_NVOLTAGES; + di->vsel_count = FAN53526_NVOLTAGES; return 0; } -- GitLab From d4db69eba290732357f03ba0a14350b81f778290 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Tue, 11 May 2021 17:13:33 -0400 Subject: [PATCH 0500/3804] regulator: fan53555: fix TCS4525 voltage calulation The TCS4525 has 128 voltage steps. With the calculation set to 127 the most significant bit is disregarded which leads to a miscalculation of the voltage by about 200mv. Fix the calculation to end deadlock on the rk3566-quartz64 which uses this as the cpu regulator. Fixes: 914df8faa7d6 ("regulator: fan53555: Add TCS4525 DCDC support") Signed-off-by: Peter Geis Link: https://lore.kernel.org/r/20210511211335.2935163-2-pgwipeout@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/fan53555.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c index f3918f03aaf3d..26f06f685b1b6 100644 --- a/drivers/regulator/fan53555.c +++ b/drivers/regulator/fan53555.c @@ -55,7 +55,6 @@ #define FAN53555_NVOLTAGES 64 /* Numbers of voltages */ #define FAN53526_NVOLTAGES 128 -#define TCS4525_NVOLTAGES 127 /* Numbers of voltages */ #define TCS_VSEL_NSEL_MASK 0x7f #define TCS_VSEL0_MODE (1 << 7) @@ -376,7 +375,7 @@ static int fan53555_voltages_setup_tcs(struct fan53555_device_info *di) /* Init voltage range and step */ di->vsel_min = 600000; di->vsel_step = 6250; - di->vsel_count = TCS4525_NVOLTAGES; + di->vsel_count = FAN53526_NVOLTAGES; return 0; } -- GitLab From f9028dcdf589f4ab528372088623aa4e8d324df2 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Tue, 11 May 2021 17:13:34 -0400 Subject: [PATCH 0501/3804] regulator: fan53555: only bind tcs4525 to correct chip id The tcs4525 regulator has a chip id of <12>. Only allow the driver to bind to the correct chip id for safety, in accordance with the other supported devices. Signed-off-by: Peter Geis Link: https://lore.kernel.org/r/20210511211335.2935163-3-pgwipeout@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/fan53555.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c index 26f06f685b1b6..16f28f9df6a1b 100644 --- a/drivers/regulator/fan53555.c +++ b/drivers/regulator/fan53555.c @@ -89,6 +89,10 @@ enum { FAN53555_CHIP_ID_08 = 8, }; +enum { + TCS4525_CHIP_ID_12 = 12, +}; + /* IC mask revision */ enum { FAN53555_CHIP_REV_00 = 0x3, @@ -368,14 +372,21 @@ static int fan53555_voltages_setup_silergy(struct fan53555_device_info *di) static int fan53555_voltages_setup_tcs(struct fan53555_device_info *di) { - di->slew_reg = TCS4525_TIME; - di->slew_mask = TCS_SLEW_MASK; - di->slew_shift = TCS_SLEW_MASK; + switch (di->chip_id) { + case TCS4525_CHIP_ID_12: + di->slew_reg = TCS4525_TIME; + di->slew_mask = TCS_SLEW_MASK; + di->slew_shift = TCS_SLEW_MASK; - /* Init voltage range and step */ - di->vsel_min = 600000; - di->vsel_step = 6250; - di->vsel_count = FAN53526_NVOLTAGES; + /* Init voltage range and step */ + di->vsel_min = 600000; + di->vsel_step = 6250; + di->vsel_count = FAN53526_NVOLTAGES; + break; + default: + dev_err(di->dev, "Chip ID %d not supported!\n", di->chip_id); + return -EINVAL; + } return 0; } -- GitLab From b3cc8ec04f50d9c860534fe4e3617a8d10ed9ea9 Mon Sep 17 00:00:00 2001 From: Peter Geis Date: Tue, 11 May 2021 17:13:35 -0400 Subject: [PATCH 0502/3804] regulator: fan53555: fix tcs4525 function names The tcs4525 is based off the fan53526. Rename the tcs4525 functions to align with this. Signed-off-by: Peter Geis Link: https://lore.kernel.org/r/20210511211335.2935163-4-pgwipeout@gmail.com Signed-off-by: Mark Brown --- drivers/regulator/fan53555.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c index 16f28f9df6a1b..2695be617373c 100644 --- a/drivers/regulator/fan53555.c +++ b/drivers/regulator/fan53555.c @@ -67,7 +67,7 @@ enum fan53555_vendor { FAN53526_VENDOR_FAIRCHILD = 0, FAN53555_VENDOR_FAIRCHILD, FAN53555_VENDOR_SILERGY, - FAN53555_VENDOR_TCS, + FAN53526_VENDOR_TCS, }; enum { @@ -233,7 +233,7 @@ static int fan53555_set_ramp(struct regulator_dev *rdev, int ramp) slew_rate_t = slew_rates; slew_rate_n = ARRAY_SIZE(slew_rates); break; - case FAN53555_VENDOR_TCS: + case FAN53526_VENDOR_TCS: slew_rate_t = tcs_slew_rates; slew_rate_n = ARRAY_SIZE(tcs_slew_rates); break; @@ -370,7 +370,7 @@ static int fan53555_voltages_setup_silergy(struct fan53555_device_info *di) return 0; } -static int fan53555_voltages_setup_tcs(struct fan53555_device_info *di) +static int fan53526_voltages_setup_tcs(struct fan53555_device_info *di) { switch (di->chip_id) { case TCS4525_CHIP_ID_12: @@ -420,7 +420,7 @@ static int fan53555_device_setup(struct fan53555_device_info *di, return -EINVAL; } break; - case FAN53555_VENDOR_TCS: + case FAN53526_VENDOR_TCS: switch (pdata->sleep_vsel_id) { case FAN53555_VSEL_ID_0: di->sleep_reg = TCS4525_VSEL0; @@ -459,7 +459,7 @@ static int fan53555_device_setup(struct fan53555_device_info *di, di->mode_reg = di->vol_reg; di->mode_mask = VSEL_MODE; break; - case FAN53555_VENDOR_TCS: + case FAN53526_VENDOR_TCS: di->mode_reg = TCS4525_COMMAND; switch (pdata->sleep_vsel_id) { @@ -487,8 +487,8 @@ static int fan53555_device_setup(struct fan53555_device_info *di, case FAN53555_VENDOR_SILERGY: ret = fan53555_voltages_setup_silergy(di); break; - case FAN53555_VENDOR_TCS: - ret = fan53555_voltages_setup_tcs(di); + case FAN53526_VENDOR_TCS: + ret = fan53526_voltages_setup_tcs(di); break; default: dev_err(di->dev, "vendor %d not supported!\n", di->vendor); @@ -563,7 +563,7 @@ static const struct of_device_id __maybe_unused fan53555_dt_ids[] = { .data = (void *)FAN53555_VENDOR_SILERGY, }, { .compatible = "tcs,tcs4525", - .data = (void *)FAN53555_VENDOR_TCS + .data = (void *)FAN53526_VENDOR_TCS }, { } }; @@ -671,7 +671,7 @@ static const struct i2c_device_id fan53555_id[] = { .driver_data = FAN53555_VENDOR_SILERGY }, { .name = "tcs4525", - .driver_data = FAN53555_VENDOR_TCS + .driver_data = FAN53526_VENDOR_TCS }, { }, }; -- GitLab From dbb5afad100a828c97e012c6106566d99f041db6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 12 May 2021 15:33:08 +0200 Subject: [PATCH 0503/3804] ptrace: make ptrace() fail if the tracee changed its pid unexpectedly Suppose we have 2 threads, the group-leader L and a sub-theread T, both parked in ptrace_stop(). Debugger tries to resume both threads and does ptrace(PTRACE_CONT, T); ptrace(PTRACE_CONT, L); If the sub-thread T execs in between, the 2nd PTRACE_CONT doesn not resume the old leader L, it resumes the post-exec thread T which was actually now stopped in PTHREAD_EVENT_EXEC. In this case the PTHREAD_EVENT_EXEC event is lost, and the tracer can't know that the tracee changed its pid. This patch makes ptrace() fail in this case until debugger does wait() and consumes PTHREAD_EVENT_EXEC which reports old_pid. This affects all ptrace requests except the "asynchronous" PTRACE_INTERRUPT/KILL. The patch doesn't add the new PTRACE_ option to not complicate the API, and I _hope_ this won't cause any noticeable regression: - If debugger uses PTRACE_O_TRACEEXEC and the thread did an exec and the tracer does a ptrace request without having consumed the exec event, it's 100% sure that the thread the ptracer thinks it is targeting does not exist anymore, or isn't the same as the one it thinks it is targeting. - To some degree this patch adds nothing new. In the scenario above ptrace(L) can fail with -ESRCH if it is called after the execing sub-thread wakes the leader up and before it "steals" the leader's pid. Test-case: #include #include #include #include #include #include #include #include void *tf(void *arg) { execve("/usr/bin/true", NULL, NULL); assert(0); return NULL; } int main(void) { int leader = fork(); if (!leader) { kill(getpid(), SIGSTOP); pthread_t th; pthread_create(&th, NULL, tf, NULL); for (;;) pause(); return 0; } waitpid(leader, NULL, WSTOPPED); ptrace(PTRACE_SEIZE, leader, 0, PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXEC); waitpid(leader, NULL, 0); ptrace(PTRACE_CONT, leader, 0,0); waitpid(leader, NULL, 0); int status, thread = waitpid(-1, &status, 0); assert(thread > 0 && thread != leader); assert(status == 0x80137f); ptrace(PTRACE_CONT, thread, 0,0); /* * waitid() because waitpid(leader, &status, WNOWAIT) does not * report status. Why ???? * * Why WEXITED? because we have another kernel problem connected * to mt-exec. */ siginfo_t info; assert(waitid(P_PID, leader, &info, WSTOPPED|WEXITED|WNOWAIT) == 0); assert(info.si_pid == leader && info.si_status == 0x0405); /* OK, it sleeps in ptrace(PTRACE_EVENT_EXEC == 0x04) */ assert(ptrace(PTRACE_CONT, leader, 0,0) == -1); assert(errno == ESRCH); assert(leader == waitpid(leader, &status, WNOHANG)); assert(status == 0x04057f); assert(ptrace(PTRACE_CONT, leader, 0,0) == 0); return 0; } Signed-off-by: Oleg Nesterov Reported-by: Simon Marchi Acked-by: "Eric W. Biederman" Acked-by: Pedro Alves Acked-by: Simon Marchi Acked-by: Jan Kratochvil Signed-off-by: Linus Torvalds --- kernel/ptrace.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 76f09456ec4bc..2997ca600d186 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -170,6 +170,21 @@ void __ptrace_unlink(struct task_struct *child) spin_unlock(&child->sighand->siglock); } +static bool looks_like_a_spurious_pid(struct task_struct *task) +{ + if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP)) + return false; + + if (task_pid_vnr(task) == task->ptrace_message) + return false; + /* + * The tracee changed its pid but the PTRACE_EVENT_EXEC event + * was not wait()'ed, most probably debugger targets the old + * leader which was destroyed in de_thread(). + */ + return true; +} + /* Ensure that nothing can wake it up, even SIGKILL */ static bool ptrace_freeze_traced(struct task_struct *task) { @@ -180,7 +195,8 @@ static bool ptrace_freeze_traced(struct task_struct *task) return ret; spin_lock_irq(&task->sighand->siglock); - if (task_is_traced(task) && !__fatal_signal_pending(task)) { + if (task_is_traced(task) && !looks_like_a_spurious_pid(task) && + !__fatal_signal_pending(task)) { task->state = __TASK_TRACED; ret = true; } -- GitLab From 85428beac80dbcace5b146b218697c73e367dcf5 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Wed, 12 May 2021 16:50:05 +0200 Subject: [PATCH 0504/3804] nvmet: seset ns->file when open fails Reset the ns->file value to NULL also in the error case in nvmet_file_ns_enable(). The ns->file variable points either to file object or contains the error code after the filp_open() call. This can lead to following problem: When the user first setups an invalid file backend and tries to enable the ns, it will fail. Then the user switches over to a bdev backend and enables successfully the ns. The first received I/O will crash the system because the IO backend is chosen based on the ns->file value: static u16 nvmet_parse_io_cmd(struct nvmet_req *req) { [...] if (req->ns->file) return nvmet_file_parse_io_cmd(req); return nvmet_bdev_parse_io_cmd(req); } Reported-by: Enzo Matsumiya Signed-off-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-file.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 715d4376c9979..7fdbdc496597d 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -49,9 +49,11 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) ns->file = filp_open(ns->device_path, flags, 0); if (IS_ERR(ns->file)) { - pr_err("failed to open file %s: (%ld)\n", - ns->device_path, PTR_ERR(ns->file)); - return PTR_ERR(ns->file); + ret = PTR_ERR(ns->file); + pr_err("failed to open file %s: (%d)\n", + ns->device_path, ret); + ns->file = NULL; + return ret; } ret = nvmet_file_ns_revalidate(ns); -- GitLab From 4819d16d91145966ce03818a95169df1fd56b299 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 21 Apr 2021 18:33:58 +0300 Subject: [PATCH 0505/3804] drm/i915: Avoid div-by-zero on gen2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gen2 tiles are 2KiB in size so i915_gem_object_get_tile_row_size() can in fact return <4KiB, which leads to div-by-zero here. Avoid that. Not sure i915_gem_object_get_tile_row_size() is entirely sane anyway since it doesn't account for the different tile layouts on i8xx/i915... I'm not able to hit this before commit 6846895fde05 ("drm/i915: Replace PIN_NONFAULT with calls to PIN_NOEVICT") and it looks like I also need to run recent version of Mesa. With those in place xonotic trips on this quite easily on my 85x. Cc: stable@vger.kernel.org Reviewed-by: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210421153401.13847-2-ville.syrjala@linux.intel.com (cherry picked from commit ed52c62d386f764194e0184fdb905d5f24194cae) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gem/i915_gem_mman.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 23f6b00e08e21..f6fe5cb014382 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -189,7 +189,7 @@ compute_partial_view(const struct drm_i915_gem_object *obj, struct i915_ggtt_view view; if (i915_gem_object_is_tiled(obj)) - chunk = roundup(chunk, tile_row_pages(obj)); + chunk = roundup(chunk, tile_row_pages(obj) ?: 1); view.type = I915_GGTT_VIEW_PARTIAL; view.partial.offset = rounddown(page_offset, chunk); -- GitLab From 04d019961fd15de92874575536310243a0d4c5c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Wed, 21 Apr 2021 18:33:59 +0300 Subject: [PATCH 0506/3804] drm/i915: Read C0DRB3/C1DRB3 as 16 bits again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We've defined C0DRB3/C1DRB3 as 16 bit registers, so access them as such. Fixes: 1c8242c3a4b2 ("drm/i915: Use unchecked writes for setting up the fences") Reviewed-by: Chris Wilson Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210421153401.13847-3-ville.syrjala@linux.intel.com (cherry picked from commit f765a5b48c667bdada5e49d5e0f23f8c0687b21b) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index e72b7a0dc316e..8a322594210c4 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -653,8 +653,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt) * banks of memory are paired and unswizzled on the * uneven portion, so leave that as unknown. */ - if (intel_uncore_read(uncore, C0DRB3) == - intel_uncore_read(uncore, C1DRB3)) { + if (intel_uncore_read16(uncore, C0DRB3) == + intel_uncore_read16(uncore, C1DRB3)) { swizzle_x = I915_BIT_6_SWIZZLE_9_10; swizzle_y = I915_BIT_6_SWIZZLE_9; } -- GitLab From ea995218dddba171fecd05496c69617c5ef3c5b8 Mon Sep 17 00:00:00 2001 From: Lv Yunlong Date: Mon, 26 Apr 2021 05:43:40 -0700 Subject: [PATCH 0507/3804] drm/i915/gt: Fix a double free in gen8_preallocate_top_level_pdp Our code analyzer reported a double free bug. In gen8_preallocate_top_level_pdp, pde and pde->pt.base are allocated via alloc_pd(vm) with one reference. If pin_pt_dma() failed, pde->pt.base is freed by i915_gem_object_put() with a reference dropped. Then free_pd calls free_px() defined in intel_ppgtt.c, which calls i915_gem_object_put() to put pde->pt.base again. As pde->pt.base is protected by refcount, so the second put will not free pde->pt.base actually. But, maybe it is better to remove the first put? Fixes: 82adf901138cc ("drm/i915/gt: Shrink i915_page_directory's slab bucket") Signed-off-by: Lv Yunlong Reviewed-by: Matthew Auld Signed-off-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20210426124340.4238-1-lyl2019@mail.ustc.edu.cn (cherry picked from commit ac69496fe65cca0611d5917b7d232730ff605bc7) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c index 176c19633412f..74bf6fc8461fe 100644 --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c @@ -641,7 +641,6 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt) err = pin_pt_dma(vm, pde->pt.base); if (err) { - i915_gem_object_put(pde->pt.base); free_pd(vm, pde); return err; } -- GitLab From 402be8a101190969fc7ff122d07e262df86e132b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Marchesin?= Date: Thu, 29 Apr 2021 03:10:21 +0000 Subject: [PATCH 0508/3804] drm/i915: Fix crash in auto_retire MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The retire logic uses the 2 lower bits of the pointer to the retire function to store flags. However, the auto_retire function is not guaranteed to be aligned to a multiple of 4, which causes crashes as we jump to the wrong address, for example like this: 2021-04-24T18:03:53.804300Z WARNING kernel: [ 516.876901] invalid opcode: 0000 [#1] PREEMPT SMP NOPTI 2021-04-24T18:03:53.804310Z WARNING kernel: [ 516.876906] CPU: 7 PID: 146 Comm: kworker/u16:6 Tainted: G U 5.4.105-13595-g3cd84167b2df #1 2021-04-24T18:03:53.804311Z WARNING kernel: [ 516.876907] Hardware name: Google Volteer2/Volteer2, BIOS Google_Volteer2.13672.76.0 02/22/2021 2021-04-24T18:03:53.804312Z WARNING kernel: [ 516.876911] Workqueue: events_unbound active_work 2021-04-24T18:03:53.804313Z WARNING kernel: [ 516.876914] RIP: 0010:auto_retire+0x1/0x20 2021-04-24T18:03:53.804314Z WARNING kernel: [ 516.876916] Code: e8 01 f2 ff ff eb 02 31 db 48 89 d8 5b 5d c3 0f 1f 44 00 00 55 48 89 e5 f0 ff 87 c8 00 00 00 0f 88 ab 47 4a 00 31 c0 5d c3 0f <1f> 44 00 00 55 48 89 e5 f0 ff 8f c8 00 00 00 0f 88 9a 47 4a 00 74 2021-04-24T18:03:53.804319Z WARNING kernel: [ 516.876918] RSP: 0018:ffff9b4d809fbe38 EFLAGS: 00010286 2021-04-24T18:03:53.804320Z WARNING kernel: [ 516.876919] RAX: 0000000000000007 RBX: ffff927915079600 RCX: 0000000000000007 2021-04-24T18:03:53.804320Z WARNING kernel: [ 516.876921] RDX: ffff9b4d809fbe40 RSI: 0000000000000286 RDI: ffff927915079600 2021-04-24T18:03:53.804321Z WARNING kernel: [ 516.876922] RBP: ffff9b4d809fbe68 R08: 8080808080808080 R09: fefefefefefefeff 2021-04-24T18:03:53.804321Z WARNING kernel: [ 516.876924] R10: 0000000000000010 R11: ffffffff92e44bd8 R12: ffff9279150796a0 2021-04-24T18:03:53.804322Z WARNING kernel: [ 516.876925] R13: ffff92791c368180 R14: ffff927915079640 R15: 000000001c867605 2021-04-24T18:03:53.804323Z WARNING kernel: [ 516.876926] FS: 0000000000000000(0000) GS:ffff92791ffc0000(0000) knlGS:0000000000000000 2021-04-24T18:03:53.804323Z WARNING kernel: [ 516.876928] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 2021-04-24T18:03:53.804324Z WARNING kernel: [ 516.876929] CR2: 0000239514955000 CR3: 00000007f82da001 CR4: 0000000000760ee0 2021-04-24T18:03:53.804325Z WARNING kernel: [ 516.876930] PKRU: 55555554 2021-04-24T18:03:53.804325Z WARNING kernel: [ 516.876931] Call Trace: 2021-04-24T18:03:53.804326Z WARNING kernel: [ 516.876935] __active_retire+0x77/0xcf 2021-04-24T18:03:53.804326Z WARNING kernel: [ 516.876939] process_one_work+0x1da/0x394 2021-04-24T18:03:53.804327Z WARNING kernel: [ 516.876941] worker_thread+0x216/0x375 2021-04-24T18:03:53.804327Z WARNING kernel: [ 516.876944] kthread+0x147/0x156 2021-04-24T18:03:53.804335Z WARNING kernel: [ 516.876946] ? pr_cont_work+0x58/0x58 2021-04-24T18:03:53.804335Z WARNING kernel: [ 516.876948] ? kthread_blkcg+0x2e/0x2e 2021-04-24T18:03:53.804336Z WARNING kernel: [ 516.876950] ret_from_fork+0x1f/0x40 2021-04-24T18:03:53.804336Z WARNING kernel: [ 516.876952] Modules linked in: cdc_mbim cdc_ncm cdc_wdm xt_cgroup rfcomm cmac algif_hash algif_skcipher af_alg xt_MASQUERADE uinput snd_soc_rt5682_sdw snd_soc_rt5682 snd_soc_max98373_sdw snd_soc_max98373 snd_soc_rl6231 regmap_sdw snd_soc_sof_sdw snd_soc_hdac_hdmi snd_soc_dmic snd_hda_codec_hdmi snd_sof_pci snd_sof_intel_hda_common intel_ipu6_psys snd_sof_xtensa_dsp soundwire_intel soundwire_generic_allocation soundwire_cadence snd_sof_intel_hda snd_sof snd_soc_hdac_hda snd_soc_acpi_intel_match snd_soc_acpi snd_hda_ext_core soundwire_bus snd_hda_intel snd_intel_dspcfg snd_hda_codec snd_hwdep snd_hda_core intel_ipu6_isys videobuf2_dma_contig videobuf2_v4l2 videobuf2_common videobuf2_memops mei_hdcp intel_ipu6 ov2740 ov8856 at24 sx9310 dw9768 v4l2_fwnode cros_ec_typec intel_pmc_mux roles acpi_als typec fuse iio_trig_sysfs cros_ec_light_prox cros_ec_lid_angle cros_ec_sensors cros_ec_sensors_core industrialio_triggered_buffer cros_ec_sensors_ring kfifo_buf industrialio cros_ec_sensorhub 2021-04-24T18:03:53.804337Z WARNING kernel: [ 516.876972] cdc_ether usbnet iwlmvm lzo_rle lzo_compress iwl7000_mac80211 iwlwifi zram cfg80211 r8152 mii btusb btrtl btintel btbcm bluetooth ecdh_generic ecc joydev 2021-04-24T18:03:53.804337Z EMERG kernel: [ 516.879169] gsmi: Log Shutdown Reason 0x03 This change fixes this by aligning the function. Signed-off-by: Stéphane Marchesin Fixes: 229007e02d69 ("drm/i915: Wrap i915_active in a simple kreffed struct") Signed-off-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20210429031021.1218091-1-marcheu@chromium.org (cherry picked from commit ca419f407b43cc89942ebc297c7a63d94abbcae4) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_active.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index cf9a3d384971f..aa573b078ae75 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -1156,7 +1156,8 @@ static int auto_active(struct i915_active *ref) return 0; } -static void auto_retire(struct i915_active *ref) +__i915_active_call static void +auto_retire(struct i915_active *ref) { i915_active_put(ref); } -- GitLab From a915fe5e9601c632417ef5261af70788d7d23a8a Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 29 Apr 2021 09:35:29 +0100 Subject: [PATCH 0509/3804] drm/i915/overlay: Fix active retire callback alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit __i915_active_call annotation is required on the retire callback to ensure correct function alignment. Signed-off-by: Tvrtko Ursulin Fixes: a21ce8ad12d2 ("drm/i915/overlay: Switch to using i915_active tracking") Cc: Chris Wilson Cc: Matthew Auld Reviewed-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20210429083530.849546-1-tvrtko.ursulin@linux.intel.com (cherry picked from commit d8e44e4dd221ee283ea60a6fb87bca08807aa0ab) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_overlay.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index e5dadde422f74..bbaf05515e883 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -383,7 +383,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay) i830_overlay_clock_gating(dev_priv, true); } -static void +__i915_active_call static void intel_overlay_last_flip_retire(struct i915_active *active) { struct intel_overlay *overlay = -- GitLab From e4527420ed087f99c6aa2ac22c6d3458c7dc1a94 Mon Sep 17 00:00:00 2001 From: Ankit Nautiyal Date: Tue, 11 May 2021 17:39:30 +0530 Subject: [PATCH 0510/3804] drm/i915: Use correct downstream caps for check Src-Ctl mode for PCON Fix the typo in DPCD caps used for checking SRC CTL mode of HDMI2.1 PCON v2: Corrected Fixes tag (Jani Nikula). v3: Rebased. Fixes: 04b6603d13be ("drm/i915/display: Configure HDMI2.1 Pcon for FRL only if Src-Ctl mode is available") Cc: Ankit Nautiyal Cc: Uma Shankar Cc: Jani Nikula Cc: "Ville Syrj_l_" Cc: Imre Deak Cc: Manasi Navare Cc: Gwan-gyeong Mun Cc: Lucas De Marchi Cc: Sean Paul Signed-off-by: Ankit Nautiyal Reviewed-by: Swati Sharma Signed-off-by: Anshuman Gupta Link: https://patchwork.freedesktop.org/patch/msgid/20210511120930.12218-1-ankit.k.nautiyal@intel.com (cherry picked from commit 88a9c5485c48ab60c89612a17fc89f4162bbdb9d) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/display/intel_dp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 1e026177ed1ba..642c60f3d9b18 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -2111,7 +2111,7 @@ void intel_dp_check_frl_training(struct intel_dp *intel_dp) * -PCON supports SRC_CTL_MODE (VESA DP2.0-HDMI2.1 PCON Spec Draft-1 Sec-7) * -sink is HDMI2.1 */ - if (!(intel_dp->dpcd[2] & DP_PCON_SOURCE_CTL_MODE) || + if (!(intel_dp->downstream_ports[2] & DP_PCON_SOURCE_CTL_MODE) || !intel_dp_is_hdmi_2_1_sink(intel_dp) || intel_dp->frl.is_trained) return; -- GitLab From cc00c1988801dc71f63bb7bad019e85046865095 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 May 2021 19:51:31 +0200 Subject: [PATCH 0511/3804] sched: Fix leftover comment typos A few more snuck in. Also capitalize 'CPU' while at it. Signed-off-by: Ingo Molnar --- include/linux/sched_clock.h | 2 +- kernel/sched/core.c | 4 ++-- kernel/sched/fair.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/sched_clock.h b/include/linux/sched_clock.h index 528718e4ed528..835ee87ed7922 100644 --- a/include/linux/sched_clock.h +++ b/include/linux/sched_clock.h @@ -14,7 +14,7 @@ * @sched_clock_mask: Bitmask for two's complement subtraction of non 64bit * clocks. * @read_sched_clock: Current clock source (or dummy source when suspended). - * @mult: Multipler for scaled math conversion. + * @mult: Multiplier for scaled math conversion. * @shift: Shift value for scaled math conversion. * * Care must be taken when updating this structure; it is read by diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9d00f4958bde7..ac8882da5daf0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5506,7 +5506,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } /* - * Try and select tasks for each sibling in decending sched_class + * Try and select tasks for each sibling in descending sched_class * order. */ for_each_class(class) { @@ -5520,7 +5520,7 @@ again: /* * If this sibling doesn't yet have a suitable task to - * run; ask for the most elegible task, given the + * run; ask for the most eligible task, given the * highest priority task already selected for this * core. */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2635e10484213..161b92aa1c797 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10808,11 +10808,11 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr) * sched_slice() considers only this active rq and it gets the * whole slice. But during force idle, we have siblings acting * like a single runqueue and hence we need to consider runnable - * tasks on this cpu and the forced idle cpu. Ideally, we should + * tasks on this CPU and the forced idle CPU. Ideally, we should * go through the forced idle rq, but that would be a perf hit. - * We can assume that the forced idle cpu has atleast + * We can assume that the forced idle CPU has at least * MIN_NR_TASKS_DURING_FORCEIDLE - 1 tasks and use that to check - * if we need to give up the cpu. + * if we need to give up the CPU. */ if (rq->core->core_forceidle && rq->cfs.nr_running == 1 && __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE)) -- GitLab From c43426334b3169b6c9e6855483aa7384ff09fd33 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 May 2021 19:58:31 +0200 Subject: [PATCH 0512/3804] x86: Fix leftover comment typos Signed-off-by: Ingo Molnar --- arch/x86/hyperv/hv_init.c | 2 +- arch/x86/include/asm/sgx.h | 2 +- arch/x86/include/asm/stackprotector.h | 2 +- arch/x86/kernel/kprobes/core.c | 2 +- arch/x86/kvm/mmu/mmu.c | 2 +- arch/x86/kvm/mmu/tdp_mmu.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index bb0ae4b5c00f1..256ad0e34dd23 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -623,7 +623,7 @@ bool hv_query_ext_cap(u64 cap_query) * output parameter to the hypercall below and so it should be * compatible with 'virt_to_phys'. Which means, it's address should be * directly mapped. Use 'static' to keep it compatible; stack variables - * can be virtually mapped, making them imcompatible with + * can be virtually mapped, making them incompatible with * 'virt_to_phys'. * Hypercall input/output addresses should also be 8-byte aligned. */ diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h index 9c31e0ebc55b1..05f3e21f01a74 100644 --- a/arch/x86/include/asm/sgx.h +++ b/arch/x86/include/asm/sgx.h @@ -13,7 +13,7 @@ /* * This file contains both data structures defined by SGX architecture and Linux * defined software data structures and functions. The two should not be mixed - * together for better readibility. The architectural definitions come first. + * together for better readability. The architectural definitions come first. */ /* The SGX specific CPUID function. */ diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index b6ffe58c70fab..24a8d6c4fb185 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -11,7 +11,7 @@ * The same segment is shared by percpu area and stack canary. On * x86_64, percpu symbols are zero based and %gs (64-bit) points to the * base of percpu area. The first occupant of the percpu area is always - * fixed_percpu_data which contains stack_canary at the approproate + * fixed_percpu_data which contains stack_canary at the appropriate * offset. On x86_32, the stack canary is just a regular percpu * variable. * diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index d3d65545cb8b7..7c4d0736a9987 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -674,7 +674,7 @@ static int prepare_emulation(struct kprobe *p, struct insn *insn) break; if (insn->addr_bytes != sizeof(unsigned long)) - return -EOPNOTSUPP; /* Don't support differnt size */ + return -EOPNOTSUPP; /* Don't support different size */ if (X86_MODRM_MOD(opcode) != 3) return -EOPNOTSUPP; /* TODO: support memory addressing */ diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 0144c40d09c76..5e60b00e8e500 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2374,7 +2374,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu) * page is available, while the caller may end up allocating as many as * four pages, e.g. for PAE roots or for 5-level paging. Temporarily * exceeding the (arbitrary by default) limit will not harm the host, - * being too agressive may unnecessarily kill the guest, and getting an + * being too aggressive may unnecessarily kill the guest, and getting an * exact count is far more trouble than it's worth, especially in the * page fault paths. */ diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 95eeb5ac6a8a7..e09cb1f978009 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1017,7 +1017,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, if (!is_shadow_present_pte(iter.old_spte)) { /* - * If SPTE has been forzen by another thread, just + * If SPTE has been frozen by another thread, just * give up and retry, avoiding unnecessary page table * allocation and free. */ -- GitLab From 93d0955e6cf562d02aae37f5f8d98d9d9d16e0d4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 12 May 2021 20:04:28 +0200 Subject: [PATCH 0513/3804] locking: Fix comment typos A few snuck through. Signed-off-by: Ingo Molnar --- include/linux/lockdep_types.h | 2 +- kernel/futex.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h index 2ec9ff5a7fff0..3e726ace5c621 100644 --- a/include/linux/lockdep_types.h +++ b/include/linux/lockdep_types.h @@ -52,7 +52,7 @@ enum lockdep_lock_type { * NR_LOCKDEP_CACHING_CLASSES ... Number of classes * cached in the instance of lockdep_map * - * Currently main class (subclass == 0) and signle depth subclass + * Currently main class (subclass == 0) and single depth subclass * are cached in lockdep_map. This optimization is mainly targeting * on rq->lock. double_rq_lock() acquires this highly competitive with * single depth. diff --git a/kernel/futex.c b/kernel/futex.c index 4938a00bc7857..2f386f0129001 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1874,7 +1874,7 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1, * If the caller intends to requeue more than 1 waiter to pifutex, * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now, * as we have means to handle the possible fault. If not, don't set - * the bit unecessarily as it will force the subsequent unlock to enter + * the bit unnecessarily as it will force the subsequent unlock to enter * the kernel. */ top_waiter = futex_top_waiter(hb1, key1); @@ -2103,7 +2103,7 @@ retry_private: continue; /* - * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always + * FUTEX_WAIT_REQUEUE_PI and FUTEX_CMP_REQUEUE_PI should always * be paired with each other and no other futex ops. * * We should never be requeueing a futex_q with a pi_state, @@ -2318,7 +2318,7 @@ retry: } /* - * PI futexes can not be requeued and must remove themself from the + * PI futexes can not be requeued and must remove themselves from the * hash bucket. The hash bucket lock (i.e. lock_ptr) is held. */ static void unqueue_me_pi(struct futex_q *q) @@ -2903,7 +2903,7 @@ no_block: */ res = fixup_owner(uaddr, &q, !ret); /* - * If fixup_owner() returned an error, proprogate that. If it acquired + * If fixup_owner() returned an error, propagate that. If it acquired * the lock, clear our -ETIMEDOUT or -EINTR. */ if (res) @@ -3280,7 +3280,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, */ res = fixup_owner(uaddr2, &q, !ret); /* - * If fixup_owner() returned an error, proprogate that. If it + * If fixup_owner() returned an error, propagate that. If it * acquired the lock, clear -ETIMEDOUT or -EINTR. */ if (res) @@ -3678,7 +3678,7 @@ void futex_exec_release(struct task_struct *tsk) { /* * The state handling is done for consistency, but in the case of - * exec() there is no way to prevent futher damage as the PID stays + * exec() there is no way to prevent further damage as the PID stays * the same. But for the unlikely and arguably buggy case that a * futex is held on exec(), this provides at least as much state * consistency protection which is possible. -- GitLab From ca0760e7d79e2bb9c342e6b3f925b1ef01c6303e Mon Sep 17 00:00:00 2001 From: Wei Ming Chen Date: Thu, 6 May 2021 20:30:51 +0800 Subject: [PATCH 0514/3804] Compiler Attributes: Add continue in comment Add "continue;" for switch/case block according to Doc[1] [1] https://www.kernel.org/doc/html/latest/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Wei Ming Chen Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index c043b8d2b17bf..183ddd5fd0724 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -199,6 +199,7 @@ * must end with any of these keywords: * break; * fallthrough; + * continue; * goto