diff --git a/.mailmap b/.mailmap index b157f88ce26a41d2bc3307729fd4db018e8f63c7..8cd44b0c657970486f3c3e27f20d36a4fc7bc9b9 100644 --- a/.mailmap +++ b/.mailmap @@ -70,6 +70,7 @@ Boris Brezillon Boris Brezillon Brian Avery Brian King +Brian Silverman Changbin Du Changbin Du Chao Yu @@ -79,6 +80,9 @@ Chris Chiu Christian Borntraeger Christian Borntraeger Christian Borntraeger +Christian Brauner +Christian Brauner +Christian Brauner Christophe Ricard Christoph Hellwig Colin Ian King diff --git a/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing b/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing index b363827da4375d207fd0bb19d51bd89d922e3bed..910df0e5815abfade18e13eb8132002a90fa6e40 100644 --- a/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing +++ b/Documentation/ABI/testing/sysfs-driver-aspeed-uart-routing @@ -1,4 +1,4 @@ -What: /sys/bus/platform/drivers/aspeed-uart-routing/*/uart* +What: /sys/bus/platform/drivers/aspeed-uart-routing/\*/uart\* Date: September 2021 Contact: Oskar Senft Chia-Wei Wang @@ -9,7 +9,7 @@ Description: Selects the RX source of the UARTx device. depends on the selected file. e.g. - cat /sys/bus/platform/drivers/aspeed-uart-routing/*.uart_routing/uart1 + cat /sys/bus/platform/drivers/aspeed-uart-routing/\*.uart_routing/uart1 [io1] io2 io3 io4 uart2 uart3 uart4 io6 In this case, UART1 gets its input from IO1 (physical serial port 1). @@ -17,7 +17,7 @@ Description: Selects the RX source of the UARTx device. Users: OpenBMC. Proposed changes should be mailed to openbmc@lists.ozlabs.org -What: /sys/bus/platform/drivers/aspeed-uart-routing/*/io* +What: /sys/bus/platform/drivers/aspeed-uart-routing/\*/io\* Date: September 2021 Contact: Oskar Senft Chia-Wei Wang diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst index f2b3439edcc2cc772c7f92cd3510060df6b5e8b8..860fe651d6453eed1652f721cb048c8b3840be33 100644 --- a/Documentation/accounting/psi.rst +++ b/Documentation/accounting/psi.rst @@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger for the same psi metric can be specified. However for each trigger a separate file descriptor is required to be able to poll it separately from others, therefore for each trigger a separate open() syscall should be made even -when opening the same psi interface file. +when opening the same psi interface file. Write operations to a file descriptor +with an already existing psi trigger will fail with EBUSY. Monitors activate only when system enters stall state for the monitored psi metric and deactivates upon exit from the stall state. While system is diff --git a/Documentation/admin-guide/gpio/index.rst b/Documentation/admin-guide/gpio/index.rst index 7db367572f303264a654a33f35f17e97ff6ec73f..f6861ca16ffe6bfc93a4997387a5a34c8aa0c751 100644 --- a/Documentation/admin-guide/gpio/index.rst +++ b/Documentation/admin-guide/gpio/index.rst @@ -10,6 +10,7 @@ gpio gpio-aggregator sysfs gpio-mockup + gpio-sim .. only:: subproject and html diff --git a/Documentation/arm/marvell.rst b/Documentation/arm/marvell.rst index 9485a5a2e2e970aec59d0ec81e07090322b3fcec..2f41caa0096c3c17d52e49f6f241e3f9b391e6d1 100644 --- a/Documentation/arm/marvell.rst +++ b/Documentation/arm/marvell.rst @@ -266,10 +266,12 @@ Avanta family ------------- Flavors: + - 88F6500 - 88F6510 - 88F6530P - 88F6550 - 88F6560 + - 88F6601 Homepage: https://web.archive.org/web/20181005145041/http://www.marvell.com/broadband/ diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 5342e895fb604bfdfc0bead2e30283e5aec2075f..ea281dd755171d5c61a140997839c6846ac52f01 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -52,6 +52,12 @@ stable kernels. | Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #1902691 | ARM64_ERRATUM_1902691 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #827319 | ARM64_ERRATUM_827319 | @@ -92,12 +98,20 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2224489 | ARM64_ERRATUM_2224489 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst index dcefee707ccdc35fbdc800ee18bbf373e2884d19..a833ecf12fbc1dde8b10e7ed6bd78440797ccb1e 100644 --- a/Documentation/dev-tools/kselftest.rst +++ b/Documentation/dev-tools/kselftest.rst @@ -7,6 +7,14 @@ directory. These are intended to be small tests to exercise individual code paths in the kernel. Tests are intended to be run after building, installing and booting a kernel. +Kselftest from mainline can be run on older stable kernels. Running tests +from mainline offers the best coverage. Several test rings run mainline +kselftest suite on stable releases. The reason is that when a new test +gets added to test existing code to regression test a bug, we should be +able to run that test on an older kernel. Hence, it is important to keep +code that can still test an older kernel and make sure it skips the test +gracefully on newer releases. + You can find additional information on Kselftest framework, how to write new tests using the framework on Kselftest wiki: diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst index 76af931a332cda24e9c9afc354b26e45160a1060..1c83e7d60a8a973dc1f679cce1f280e7f969aba1 100644 --- a/Documentation/dev-tools/kunit/usage.rst +++ b/Documentation/dev-tools/kunit/usage.rst @@ -242,7 +242,7 @@ example: int rectangle_area(struct shape *this) { - struct rectangle *self = container_of(this, struct shape, parent); + struct rectangle *self = container_of(this, struct rectangle, parent); return self->length * self->width; }; diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt index e77635c5422c6a03995bc45e02049d6b44e4b190..fa8b31660cadd1e2cfd0c7cc8c19bb971be3808e 100644 --- a/Documentation/devicetree/bindings/arm/omap/omap.txt +++ b/Documentation/devicetree/bindings/arm/omap/omap.txt @@ -119,6 +119,9 @@ Boards (incomplete list of examples): - OMAP3 BeagleBoard : Low cost community board compatible = "ti,omap3-beagle", "ti,omap3430", "ti,omap3" +- OMAP3 BeagleBoard A to B4 : Early BeagleBoard revisions A to B4 with a timer quirk + compatible = "ti,omap3-beagle-ab4", "ti,omap3-beagle", "ti,omap3430", "ti,omap3" + - OMAP3 Tobi with Overo : Commercial expansion board with daughter board compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3430", "ti,omap3" diff --git a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml index 8e13f27b28edc0c64684c883869ba4885ee479fd..bce96b5b0db083463eed3dba24dc6b26b99b025f 100644 --- a/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml +++ b/Documentation/devicetree/bindings/display/bridge/analogix,anx7814.yaml @@ -7,7 +7,9 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Analogix ANX7814 SlimPort (Full-HD Transmitter) maintainers: - - Enric Balletbo i Serra + - Andrzej Hajda + - Neil Armstrong + - Robert Foss properties: compatible: diff --git a/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml b/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml index 9f7cc6b757cbbbca02f5d56f8b2572f8665b73fc..a88a5d8c7ba51618aae809a311442b6b58e64e36 100644 --- a/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml +++ b/Documentation/devicetree/bindings/display/bridge/google,cros-ec-anx7688.yaml @@ -8,7 +8,6 @@ title: ChromeOS EC ANX7688 HDMI to DP Converter through Type-C Port maintainers: - Nicolas Boichat - - Enric Balletbo i Serra description: | ChromeOS EC ANX7688 is a display bridge that converts HDMI 2.0 to diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml index cdaf7a7a8f8867e4e13ed4238e2a196300020c08..186e17be51fb21a3acfd810efe8ebd6c38cbe63f 100644 --- a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml @@ -8,7 +8,6 @@ title: MIPI DSI to eDP Video Format Converter Device Tree Bindings maintainers: - Nicolas Boichat - - Enric Balletbo i Serra description: | The PS8640 is a low power MIPI-to-eDP video format converter supporting diff --git a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml index a108029ecfabc4b054fbe1535e8db31ff59c7579..acd2f3faa6b9b0616111301f94975ecc1283bf62 100644 --- a/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml +++ b/Documentation/devicetree/bindings/display/panel/abt,y030xx067a.yaml @@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Asia Better Technology 3.0" (320x480 pixels) 24-bit IPS LCD panel -description: | - The panel must obey the rules for a SPI slave device as specified in - spi/spi-controller.yaml - maintainers: - Paul Cercueil allOf: - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml index e89c1ea62ffa9f7a5db74b5e54a6aa9857efeb19..7d221ef35443420cf9bd4bc6c3a561f0f83d34f6 100644 --- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml +++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9322.yaml @@ -15,11 +15,9 @@ description: | 960 TFT source driver pins and 240 TFT gate driver pins, VCOM, VCOML and VCOMH outputs. - The panel must obey the rules for a SPI slave device as specified in - spi/spi-controller.yaml - allOf: - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml b/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml index cda36c04e85cfba7852328bb3c6cc6733c79e8b2..72788e3e6c59064423eff61faa5bb58599291303 100644 --- a/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml +++ b/Documentation/devicetree/bindings/display/panel/innolux,ej030na.yaml @@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Innolux EJ030NA 3.0" (320x480 pixels) 24-bit TFT LCD panel -description: | - The panel must obey the rules for a SPI slave device as specified in - spi/spi-controller.yaml - maintainers: - Paul Cercueil allOf: - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml index c45c92a3d41fcd1801cf47cb52b3563a5f03948f..2a2756d196810e6489ef2cd912d0fc361ee61fd2 100644 --- a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml +++ b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd035g6-54nt.yaml @@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: King Display KD035G6-54NT 3.5" (320x240 pixels) 24-bit TFT LCD panel -description: | - The panel must obey the rules for a SPI slave device as specified in - spi/spi-controller.yaml - maintainers: - Paul Cercueil allOf: - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml b/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml index 830e335ddb532e4530ae8215e7a1a8e2903d7d26..5e4e0e552c2f893af73df20590560123573cd24e 100644 --- a/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml +++ b/Documentation/devicetree/bindings/display/panel/lgphilips,lb035q02.yaml @@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: LG.Philips LB035Q02 Panel -description: | - The panel must obey the rules for a SPI slave device as specified in - spi/spi-controller.yaml - maintainers: - Tomi Valkeinen allOf: - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml b/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml index 060ee27a4749ef49134ee718f809fa7b2e8bc842..d525165d6d6314a15b2b5a08c564ecd95224a1eb 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,ld9040.yaml @@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Samsung LD9040 AMOLED LCD parallel RGB panel with SPI control bus -description: | - The panel must obey the rules for a SPI slave device as specified in - spi/spi-controller.yaml - maintainers: - Andrzej Hajda allOf: - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: compatible: @@ -63,8 +60,6 @@ examples: lcd@0 { compatible = "samsung,ld9040"; - #address-cells = <1>; - #size-cells = <0>; reg = <0>; vdd3-supply = <&ldo7_reg>; diff --git a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml index ea58df49263ac3bd08bc55a42a2fd1c374ad9b70..940f7f88526ff9796a9540f566609781e6248722 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.yaml @@ -12,6 +12,7 @@ maintainers: allOf: - $ref: panel-common.yaml# - $ref: /schemas/leds/backlight/common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml index fa46d151e7b3f0a2b46275449811112544aa8ac4..9e1d707c2ace18da7d26ae834740e56703405f49 100644 --- a/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml +++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7789v.yaml @@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Sitronix ST7789V RGB panel with SPI control bus -description: | - The panel must obey the rules for a SPI slave device as specified in - spi/spi-controller.yaml - maintainers: - Maxime Ripard allOf: - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml b/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml index 95d053c548abab467e5e234297e659b01af0cce5..98abdf4ddeac605aeb1dc18d15b60a7a7beb372c 100644 --- a/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml +++ b/Documentation/devicetree/bindings/display/panel/sony,acx565akm.yaml @@ -6,15 +6,12 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Sony ACX565AKM SDI Panel -description: | - The panel must obey the rules for a SPI slave device as specified in - spi/spi-controller.yaml - maintainers: - Tomi Valkeinen allOf: - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/display/panel/tpo,td.yaml b/Documentation/devicetree/bindings/display/panel/tpo,td.yaml index 4aa605613445db9001529012d2cfc85d3066dd2b..f902a9d741414baf52a1cfdaec0b46475242451a 100644 --- a/Documentation/devicetree/bindings/display/panel/tpo,td.yaml +++ b/Documentation/devicetree/bindings/display/panel/tpo,td.yaml @@ -6,16 +6,13 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Toppoly TD Panels -description: | - The panel must obey the rules for a SPI slave device as specified in - spi/spi-controller.yaml - maintainers: - Marek Belisko - H. Nikolaus Schaller allOf: - $ref: panel-common.yaml# + - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml index 008c144257cbb0cc2e2e3933f987eff7f81b390c..1a68a940d165eeb2adc96ad5388e6e42c3d4c20c 100644 --- a/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/rockchip/rockchip,rk3066-hdmi.yaml @@ -26,14 +26,6 @@ properties: clock-names: const: hclk - pinctrl-0: - maxItems: 2 - - pinctrl-names: - const: default - description: - Switch the iomux for the HPD/I2C pins to HDMI function. - power-domains: maxItems: 1 diff --git a/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml b/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml index 20e1ccfc8630b0fe9fc8366d44a42a1780e9963c..2d82b44268dbd7dbb38da6baf6abd1011f268fab 100644 --- a/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml +++ b/Documentation/devicetree/bindings/extcon/extcon-usbc-cros-ec.yaml @@ -8,7 +8,6 @@ title: ChromeOS EC USB Type-C cable and accessories detection maintainers: - Benson Leung - - Enric Balletbo i Serra description: | On ChromeOS systems with USB Type C ports, the ChromeOS Embedded Controller is diff --git a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml index b386e4128a7915f571251f1d39a538ca6c7c970f..6e1c70e9275ecd7438f681b243fb9933d5817bf8 100644 --- a/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml +++ b/Documentation/devicetree/bindings/i2c/google,cros-ec-i2c-tunnel.yaml @@ -10,7 +10,6 @@ title: I2C bus that tunnels through the ChromeOS EC (cros-ec) maintainers: - Doug Anderson - Benson Leung - - Enric Balletbo i Serra description: | On some ChromeOS board designs we've got a connection to the EC diff --git a/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml b/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml index 099b4be927d4ebeb370f5ebc32446bbbba39c14b..00e3b59641d2a36517168c66ca6f0f5d097d79a6 100644 --- a/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml +++ b/Documentation/devicetree/bindings/iio/proximity/google,cros-ec-mkbp-proximity.yaml @@ -10,7 +10,6 @@ title: ChromeOS EC MKBP Proximity Sensor maintainers: - Stephen Boyd - Benson Leung - - Enric Balletbo i Serra description: | Google's ChromeOS EC sometimes has the ability to detect user proximity. diff --git a/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml b/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml index 5377b232fa10f85862bd280dd7e53dfc7947bb56..e8f137abb03c9a830d8b7a282445a7f5d6cfcadd 100644 --- a/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml +++ b/Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml @@ -10,7 +10,6 @@ title: ChromeOS EC Keyboard maintainers: - Simon Glass - Benson Leung - - Enric Balletbo i Serra description: | Google's ChromeOS EC Keyboard is a simple matrix keyboard diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml index dbe7ecc19ccb94236b5f991a585e127533ccb4ba..7fe1966ea28ac6875a6f63f6bc152cec62b7109c 100644 --- a/Documentation/devicetree/bindings/input/gpio-keys.yaml +++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml @@ -88,12 +88,6 @@ patternProperties: which can be disabled to suppress events from the button. type: boolean - pinctrl-0: - maxItems: 1 - - pinctrl-names: - maxItems: 1 - required: - linux,code diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml index 79d0358e2f612f2487968a03174406f98b75ce05..620f01775e429c3e4b76ccd1ae51f813da41b31d 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml @@ -36,6 +36,7 @@ properties: - renesas,intc-ex-r8a77980 # R-Car V3H - renesas,intc-ex-r8a77990 # R-Car E3 - renesas,intc-ex-r8a77995 # R-Car D3 + - renesas,intc-ex-r8a779a0 # R-Car V3U - const: renesas,irqc '#interrupt-cells': diff --git a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml index 28b6b17fe4b267782f35bd0c9bd482542665facf..27092c6a86c491c08823c675fd3ce4fbe952da37 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml @@ -35,6 +35,10 @@ description: contains a specific memory layout, which is documented in chapter 8 of the SiFive U5 Coreplex Series Manual . + The thead,c900-plic is different from sifive,plic-1.0.0 in opensbi, the + T-HEAD PLIC implementation requires setting a delegation bit to allow access + from S-mode. So add thead,c900-plic to distinguish them. + maintainers: - Sagar Kadam - Paul Walmsley @@ -42,12 +46,17 @@ maintainers: properties: compatible: - items: - - enum: - - sifive,fu540-c000-plic - - starfive,jh7100-plic - - canaan,k210-plic - - const: sifive,plic-1.0.0 + oneOf: + - items: + - enum: + - sifive,fu540-c000-plic + - starfive,jh7100-plic + - canaan,k210-plic + - const: sifive,plic-1.0.0 + - items: + - enum: + - allwinner,sun20i-d1-plic + - const: thead,c900-plic reg: maxItems: 1 @@ -62,6 +71,7 @@ properties: interrupts-extended: minItems: 1 + maxItems: 15872 description: Specifies which contexts are connected to the PLIC, with "-1" specifying that a context is not present. Each node pointed to should be a @@ -90,12 +100,11 @@ examples: #interrupt-cells = <1>; compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0"; interrupt-controller; - interrupts-extended = < - &cpu0_intc 11 - &cpu1_intc 11 &cpu1_intc 9 - &cpu2_intc 11 &cpu2_intc 9 - &cpu3_intc 11 &cpu3_intc 9 - &cpu4_intc 11 &cpu4_intc 9>; + interrupts-extended = <&cpu0_intc 11>, + <&cpu1_intc 11>, <&cpu1_intc 9>, + <&cpu2_intc 11>, <&cpu2_intc 9>, + <&cpu3_intc 11>, <&cpu3_intc 9>, + <&cpu4_intc 11>, <&cpu4_intc 9>; reg = <0xc000000 0x4000000>; riscv,ndev = <10>; }; diff --git a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml index 1ef849dc74d7ee3c2110de419c514c8acd2fd1af..e2e6e9aa0fe69e2277eeea38172a1bb98d9f5099 100644 --- a/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml +++ b/Documentation/devicetree/bindings/media/nxp,imx7-mipi-csi2.yaml @@ -81,14 +81,12 @@ properties: data-lanes: description: Note that 'fsl,imx7-mipi-csi2' only supports up to 2 data lines. + minItems: 1 items: - minItems: 1 - maxItems: 4 - items: - - const: 1 - - const: 2 - - const: 3 - - const: 4 + - const: 1 + - const: 2 + - const: 3 + - const: 4 required: - data-lanes diff --git a/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml b/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml index 9c04fa85ee5c25917ac030f295cb93252632e90d..1b3e1c4b99edc06a6b945219521bd021632b88b1 100644 --- a/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml +++ b/Documentation/devicetree/bindings/media/nxp,imx8mq-mipi-csi2.yaml @@ -87,14 +87,12 @@ properties: properties: data-lanes: + minItems: 1 items: - minItems: 1 - maxItems: 4 - items: - - const: 1 - - const: 2 - - const: 3 - - const: 4 + - const: 1 + - const: 2 + - const: 3 + - const: 4 required: - data-lanes diff --git a/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml b/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml index 5dce62a7eff26c8e3fe6f89ed742f9c0b0a48c3f..68c75a517c9213a267062bae79468b9e47f589ee 100644 --- a/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml +++ b/Documentation/devicetree/bindings/mfd/cirrus,madera.yaml @@ -245,8 +245,7 @@ examples: interrupt-controller; #interrupt-cells = <2>; - interrupts = <&host_irq1>; - interrupt-parent = <&gic>; + interrupts = <4 1 0>; gpio-controller; #gpio-cells = <2>; diff --git a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml index 0faa4da6c7c8725882b000c03cda6abaf329059e..d1f53bd449f7b2764d71acb84c53db2297d0dd24 100644 --- a/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml +++ b/Documentation/devicetree/bindings/mfd/google,cros-ec.yaml @@ -8,7 +8,6 @@ title: ChromeOS Embedded Controller maintainers: - Benson Leung - - Enric Balletbo i Serra - Guenter Roeck description: diff --git a/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml b/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml index a4f74bec68a3edc0d759bb5e8e4b20d8ff4a240a..1e69a5a42439b5d3414e80daf9df186f9f1bf454 100644 --- a/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml +++ b/Documentation/devicetree/bindings/mmc/arm,pl18x.yaml @@ -185,6 +185,9 @@ examples: clock-names = "mclk", "apb_pclk"; }; + - | + #include + mmc@80126000 { compatible = "arm,pl18x", "arm,primecell"; reg = <0x80126000 0x1000>; @@ -206,12 +209,12 @@ examples: vqmmc-supply = <&vmmci>; }; + - | mmc@101f6000 { compatible = "arm,pl18x", "arm,primecell"; reg = <0x101f6000 0x1000>; clocks = <&sdiclk>, <&pclksdi>; clock-names = "mclk", "apb_pclk"; - interrupt-parent = <&vica>; interrupts = <22>; max-frequency = <400000>; bus-width = <4>; @@ -226,6 +229,7 @@ examples: vmmc-supply = <&vmmc_regulator>; }; + - | mmc@52007000 { compatible = "arm,pl18x", "arm,primecell"; arm,primecell-periphid = <0x10153180>; diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml index fb547e26c6764b4985294cf89b419949691d1830..401ab7cdb379b60c4766adbcb194ae00857e5fc8 100644 --- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml +++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml @@ -76,33 +76,31 @@ properties: M_CAN user manual for details. $ref: /schemas/types.yaml#/definitions/int32-array items: - items: - - description: The 'offset' is an address offset of the Message RAM where - the following elements start from. This is usually set to 0x0 if - you're using a private Message RAM. - default: 0 - - description: 11-bit Filter 0-128 elements / 0-128 words - minimum: 0 - maximum: 128 - - description: 29-bit Filter 0-64 elements / 0-128 words - minimum: 0 - maximum: 64 - - description: Rx FIFO 0 0-64 elements / 0-1152 words - minimum: 0 - maximum: 64 - - description: Rx FIFO 1 0-64 elements / 0-1152 words - minimum: 0 - maximum: 64 - - description: Rx Buffers 0-64 elements / 0-1152 words - minimum: 0 - maximum: 64 - - description: Tx Event FIFO 0-32 elements / 0-64 words - minimum: 0 - maximum: 32 - - description: Tx Buffers 0-32 elements / 0-576 words - minimum: 0 - maximum: 32 - maxItems: 1 + - description: The 'offset' is an address offset of the Message RAM where + the following elements start from. This is usually set to 0x0 if + you're using a private Message RAM. + default: 0 + - description: 11-bit Filter 0-128 elements / 0-128 words + minimum: 0 + maximum: 128 + - description: 29-bit Filter 0-64 elements / 0-128 words + minimum: 0 + maximum: 64 + - description: Rx FIFO 0 0-64 elements / 0-1152 words + minimum: 0 + maximum: 64 + - description: Rx FIFO 1 0-64 elements / 0-1152 words + minimum: 0 + maximum: 64 + - description: Rx Buffers 0-64 elements / 0-1152 words + minimum: 0 + maximum: 64 + - description: Tx Event FIFO 0-32 elements / 0-64 words + minimum: 0 + maximum: 32 + - description: Tx Buffers 0-32 elements / 0-576 words + minimum: 0 + maximum: 32 power-domains: description: diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt index 0968b40aef1e8147a63087f8f360107285fb900d..e3501bfa22e904fe52b7944a37ab541eaf531fd2 100644 --- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt +++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt @@ -31,7 +31,7 @@ tcan4x5x: tcan4x5x@0 { #address-cells = <1>; #size-cells = <1>; spi-max-frequency = <10000000>; - bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; + bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>; interrupt-parent = <&gpio1>; interrupts = <14 IRQ_TYPE_LEVEL_LOW>; device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index 47b5f728701d28b0251ac30f6c31ddb1284869c4..34c5463abceccb5820e90aa76a2db730d754ca48 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -17,9 +17,8 @@ properties: description: Specifies the MAC address that was assigned to the network device. $ref: /schemas/types.yaml#/definitions/uint8-array - items: - - minItems: 6 - maxItems: 6 + minItems: 6 + maxItems: 6 mac-address: description: @@ -28,9 +27,8 @@ properties: to the device by the boot program is different from the local-mac-address property. $ref: /schemas/types.yaml#/definitions/uint8-array - items: - - minItems: 6 - maxItems: 6 + minItems: 6 + maxItems: 6 max-frame-size: $ref: /schemas/types.yaml#/definitions/uint32 @@ -164,33 +162,30 @@ properties: type: array then: deprecated: true - minItems: 1 - maxItems: 1 items: - items: - - minimum: 0 - maximum: 31 - description: - Emulated PHY ID, choose any but unique to the all - specified fixed-links - - - enum: [0, 1] - description: - Duplex configuration. 0 for half duplex or 1 for - full duplex - - - enum: [10, 100, 1000, 2500, 10000] - description: - Link speed in Mbits/sec. - - - enum: [0, 1] - description: - Pause configuration. 0 for no pause, 1 for pause - - - enum: [0, 1] - description: - Asymmetric pause configuration. 0 for no asymmetric - pause, 1 for asymmetric pause + - minimum: 0 + maximum: 31 + description: + Emulated PHY ID, choose any but unique to the all + specified fixed-links + + - enum: [0, 1] + description: + Duplex configuration. 0 for half duplex or 1 for + full duplex + + - enum: [10, 100, 1000, 2500, 10000] + description: + Link speed in Mbits/sec. + + - enum: [0, 1] + description: + Pause configuration. 0 for no pause, 1 for pause + + - enum: [0, 1] + description: + Asymmetric pause configuration. 0 for no asymmetric + pause, 1 for asymmetric pause - if: diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index b86edf67ce626abe07e0cb124c2bd38828412550..58ecc62adfaaebe3fb6c7b9c1bce74f538f540da 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -107,6 +107,10 @@ properties: - const: imem - const: config + qcom,qmp: + $ref: /schemas/types.yaml#/definitions/phandle + description: phandle to the AOSS side-channel message RAM + qcom,smem-states: $ref: /schemas/types.yaml#/definitions/phandle-array description: State bits used in by the AP to signal the modem. @@ -222,6 +226,8 @@ examples: "imem", "config"; + qcom,qmp = <&aoss_qmp>; + qcom,smem-states = <&ipa_smp2p_out 0>, <&ipa_smp2p_out 1>; qcom,smem-state-names = "ipa-clock-enabled-valid", diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml index 456fb808100a1bb93e04514076e843dbfdbbf38b..43ed7e32e5accf264608b6043ec955bc6e19ae56 100644 --- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml +++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml @@ -50,16 +50,15 @@ patternProperties: Offset and size in bytes within the storage device. bits: - maxItems: 1 + $ref: /schemas/types.yaml#/definitions/uint32-array items: - items: - - minimum: 0 - maximum: 7 - description: - Offset in bit within the address range specified by reg. - - minimum: 1 - description: - Size in bit within the address range specified by reg. + - minimum: 0 + maximum: 7 + description: + Offset in bit within the address range specified by reg. + - minimum: 1 + description: + Size in bit within the address range specified by reg. required: - reg diff --git a/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml b/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml index 80020539c3bbeaf55a5ab9f30ddbfe8c126a5e1d..5cd512b7d5ba31882d4fc5369f19384913e2b90c 100644 --- a/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml +++ b/Documentation/devicetree/bindings/pinctrl/cirrus,lochnagar.yaml @@ -51,15 +51,6 @@ properties: appropriate of the LOCHNAGARx_PIN_NUM_GPIOS define, see [3]. maxItems: 1 - pinctrl-0: - description: - A phandle to the default pinctrl state. - - pinctrl-names: - description: - A pinctrl state named "default" must be defined. - const: default - pin-settings: type: object patternProperties: diff --git a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml index e50d7ad5c2297909b32499add4f7ab677699a3df..c85f759ae5a3371408358639abb7214fe74e2721 100644 --- a/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml +++ b/Documentation/devicetree/bindings/pinctrl/cirrus,madera.yaml @@ -30,16 +30,6 @@ description: | Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt properties: - pinctrl-0: - description: - A phandle to the node containing the subnodes containing default - configurations. - - pinctrl-names: - description: - A pinctrl state named "default" must be defined. - const: default - pin-settings: description: One subnode is required to contain the default settings. It diff --git a/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml b/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml index 3dd22220cb5f6facc101a795ceb8e865857c61fd..a72d5c7215161af19348854dcc096496934bda47 100644 --- a/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml +++ b/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml @@ -43,7 +43,7 @@ properties: priority: $ref: /schemas/types.yaml#/definitions/uint32 description: | - A priority ranging from 0 to 255 (default 128) according to the following guidelines: + A priority ranging from 0 to 255 (default 129) according to the following guidelines: 0: Restart handler of last resort, with limited restart capabilities. 128: Default restart handler; use if no other restart handler is expected to be available, @@ -51,7 +51,7 @@ properties: 255: Highest priority restart handler, will preempt all other restart handlers. minimum: 0 maximum: 255 - default: 128 + default: 129 active-delay: $ref: /schemas/types.yaml#/definitions/uint32 diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml index 2359f541b770a8ad19e209657caca029e4195613..764717ce18733c5f6e2cee028df99a7e544327b7 100644 --- a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml +++ b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml @@ -127,6 +127,7 @@ examples: st,syscfg = <&pwrcfg 0x00 0x100>; }; + - | #include #include rtc@5c004000 { diff --git a/Documentation/devicetree/bindings/sound/samsung-i2s.yaml b/Documentation/devicetree/bindings/sound/samsung-i2s.yaml index 2e3628ef48df08c072a0a7c2a170ecb3f0fdae13..84c4d6cba521c5b976e6af4d8d35be5ec4976a27 100644 --- a/Documentation/devicetree/bindings/sound/samsung-i2s.yaml +++ b/Documentation/devicetree/bindings/sound/samsung-i2s.yaml @@ -110,12 +110,6 @@ properties: Internal DMA register base address of the audio subsystem (used in secondary sound source). - pinctrl-0: - description: Should specify pin control groups used for this controller. - - pinctrl-names: - const: default - power-domains: maxItems: 1 diff --git a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml index 5dd209206e8806a1203928f918f001d72d9447e9..3ec2d7b83775a9c1be30e9629e7bb4933acfd2cf 100644 --- a/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml +++ b/Documentation/devicetree/bindings/spi/spi-peripheral-props.yaml @@ -23,8 +23,9 @@ properties: minItems: 1 maxItems: 256 items: - minimum: 0 - maximum: 256 + items: + - minimum: 0 + maximum: 256 description: Chip select used by the device. diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml index 9af1b0f4eceaf482b36592afa246008a0976e333..091792ba993e858919db308f0866ecea3e1f6059 100644 --- a/Documentation/devicetree/bindings/trivial-devices.yaml +++ b/Documentation/devicetree/bindings/trivial-devices.yaml @@ -31,7 +31,7 @@ properties: - enum: # SMBus/I2C Digital Temperature Sensor in 6-Pin SOT with SMBus Alert and Over Temperature Pin - ad,ad7414 - # ADM9240: Complete System Hardware Monitor for uProcessor-Based Systems + # ADM9240: Complete System Hardware Monitor for uProcessor-Based Systems - ad,adm9240 # AD5110 - Nonvolatile Digital Potentiometer - adi,ad5110 @@ -43,7 +43,7 @@ properties: - adi,adp5589 # AMS iAQ-Core VOC Sensor - ams,iaq-core - # i2c serial eeprom (24cxx) + # i2c serial eeprom (24cxx) - at,24c08 # i2c trusted platform module (TPM) - atmel,at97sc3204t @@ -303,9 +303,9 @@ properties: - skyworks,sky81452 # Socionext SynQuacer TPM MMIO module - socionext,synquacer-tpm-mmio - # i2c serial eeprom (24cxx) - - sparkfun,qwiic-joystick # SparkFun Qwiic Joystick (COM-15168) with i2c interface + - sparkfun,qwiic-joystick + # i2c serial eeprom (24cxx) - st,24c256 # Ambient Light Sensor with SMBUS/Two Wire Serial Interface - taos,tsl2550 diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml index c48ce3c549515d39930ef37bdcf8063f79d1d106..294093d45a2308fb51b6453e9ca791210ed125df 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.yaml +++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml @@ -25,6 +25,8 @@ patternProperties: # Keep list in alphabetical order. "^70mai,.*": description: 70mai Co., Ltd. + "^8dev,.*": + description: 8devices, UAB "^abb,.*": description: ABB "^abilis,.*": @@ -441,6 +443,8 @@ patternProperties: description: Freescale Semiconductor "^fujitsu,.*": description: Fujitsu Ltd. + "^fxtec,.*": + description: FX Technology Ltd. "^gardena,.*": description: GARDENA GmbH "^gateworks,.*": @@ -515,6 +519,8 @@ patternProperties: description: HannStar Display Co. "^holtek,.*": description: Holtek Semiconductor, Inc. + "^huawei,.*": + description: Huawei Technologies Co., Ltd. "^hugsun,.*": description: Shenzhen Hugsun Technology Co. Ltd. "^hwacom,.*": @@ -1207,6 +1213,8 @@ patternProperties: description: THine Electronics, Inc. "^thingyjp,.*": description: thingy.jp + "^thundercomm,.*": + description: Thundercomm Technology Co., Ltd. "^ti,.*": description: Texas Instruments "^tianma,.*": @@ -1334,6 +1342,8 @@ patternProperties: description: Wiligear, Ltd. "^winbond,.*": description: Winbond Electronics corp. + "^wingtech,.*": + description: Wingtech Technology Co., Ltd. "^winlink,.*": description: WinLink Co., Ltd "^winstar,.*": diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst index 94a2d7f01d99924e0d338b57e09387ecb230c41a..d3cfa73cbb2b47e624961f3e15aadb6528dfec37 100644 --- a/Documentation/driver-api/firewire.rst +++ b/Documentation/driver-api/firewire.rst @@ -19,7 +19,7 @@ of kernel interfaces is available via exported symbols in `firewire-core` module Firewire char device data structures ==================================== -.. include:: /ABI/stable/firewire-cdev +.. include:: ../ABI/stable/firewire-cdev :literal: .. kernel-doc:: include/uapi/linux/firewire-cdev.h @@ -28,7 +28,7 @@ Firewire char device data structures Firewire device probing and sysfs interfaces ============================================ -.. include:: /ABI/stable/sysfs-bus-firewire +.. include:: ../ABI/stable/sysfs-bus-firewire :literal: .. kernel-doc:: drivers/firewire/core-device.c diff --git a/Documentation/filesystems/netfs_library.rst b/Documentation/filesystems/netfs_library.rst index 136f8da3d0e24e1a8c13f7de2d5d84eb6edc1650..4f373a8ec47bc140aff7d0f9fd25efd63f0fa7aa 100644 --- a/Documentation/filesystems/netfs_library.rst +++ b/Documentation/filesystems/netfs_library.rst @@ -462,6 +462,10 @@ operation table looks like the following:: struct iov_iter *iter, netfs_io_terminated_t term_func, void *term_func_priv); + + int (*query_occupancy)(struct netfs_cache_resources *cres, + loff_t start, size_t len, size_t granularity, + loff_t *_data_start, size_t *_data_len); }; With a termination handler function pointer:: @@ -536,6 +540,18 @@ The methods defined in the table are: indicating whether the termination is definitely happening in the caller's context. + * ``query_occupancy()`` + + [Required] Called to find out where the next piece of data is within a + particular region of the cache. The start and length of the region to be + queried are passed in, along with the granularity to which the answer needs + to be aligned. The function passes back the start and length of the data, + if any, available within that region. Note that there may be a hole at the + front. + + It returns 0 if some data was found, -ENODATA if there was no usable data + within the region or -ENOBUFS if there is no caching on this file. + Note that these methods are passed a pointer to the cache resource structure, not the read request structure as they could be used in other situations where there isn't a read request structure as well, such as writing dirty data to the diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index da138dd398831304eba31fe1a13f8938cfe791db..a1212b5b302683f31c58f7be45e84281a017ce60 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -300,30 +300,6 @@ Contact: Daniel Vetter, Noralf Tronnes Level: Advanced -Garbage collect fbdev scrolling acceleration --------------------------------------------- - -Scroll acceleration has been disabled in fbcon. Now it works as the old -SCROLL_REDRAW mode. A ton of code was removed in fbcon.c and the hook bmove was -removed from fbcon_ops. -Remaining tasks: - -- a bunch of the hooks in fbcon_ops could be removed or simplified by calling - directly instead of the function table (with a switch on p->rotate) - -- fb_copyarea is unused after this, and can be deleted from all drivers - -- after that, fb_copyarea can be deleted from fb_ops in include/linux/fb.h as - well as cfb_copyarea - -Note that not all acceleration code can be deleted, since clearing and cursor -support is still accelerated, which might be good candidates for further -deletion projects. - -Contact: Daniel Vetter - -Level: Intermediate - idr_init_base() --------------- diff --git a/Documentation/index.rst b/Documentation/index.rst index 2b4de3926858825c91ba01430135c046c41f2efe..b58692d687f671c9867e50b7f067317a72bb483f 100644 --- a/Documentation/index.rst +++ b/Documentation/index.rst @@ -166,6 +166,7 @@ to ReStructured Text format, or are simply too old. .. toctree:: :maxdepth: 2 + tools/index staging/index watch_queue diff --git a/Documentation/kernel-hacking/locking.rst b/Documentation/kernel-hacking/locking.rst index e6cd40663ea52e32b885e01579c89f2bf627b3f4..4cbd50edf277827d81ae9654ff04594af5cc0234 100644 --- a/Documentation/kernel-hacking/locking.rst +++ b/Documentation/kernel-hacking/locking.rst @@ -295,7 +295,7 @@ Pete Zaitcev gives the following summary: - If you are in a process context (any syscall) and want to lock other process out, use a mutex. You can take a mutex and sleep - (``copy_from_user*(`` or ``kmalloc(x,GFP_KERNEL)``). + (``copy_from_user()`` or ``kmalloc(x,GFP_KERNEL)``). - Otherwise (== data can be touched in an interrupt), use spin_lock_irqsave() and diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst index b7f98930d38d396501f11851434beef189da59ee..1bd687b971046b34df459cf085860598a29777fd 100644 --- a/Documentation/riscv/vm-layout.rst +++ b/Documentation/riscv/vm-layout.rst @@ -47,12 +47,12 @@ RISC-V Linux Kernel SV39 | Kernel-space virtual memory, shared between all processes: ____________________________________________________________|___________________________________________________________ | | | | - ffffffc000000000 | -256 GB | ffffffc7ffffffff | 32 GB | kasan - ffffffcefee00000 | -196 GB | ffffffcefeffffff | 2 MB | fixmap - ffffffceff000000 | -196 GB | ffffffceffffffff | 16 MB | PCI io - ffffffcf00000000 | -196 GB | ffffffcfffffffff | 4 GB | vmemmap - ffffffd000000000 | -192 GB | ffffffdfffffffff | 64 GB | vmalloc/ioremap space - ffffffe000000000 | -128 GB | ffffffff7fffffff | 124 GB | direct mapping of all physical memory + ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap + ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io + ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap + ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space + ffffffd800000000 | -160 GB | fffffff6ffffffff | 124 GB | direct mapping of all physical memory + fffffff700000000 | -36 GB | fffffffeffffffff | 32 GB | kasan __________________|____________|__________________|_________|____________________________________________________________ | | diff --git a/Documentation/staging/tee.rst b/Documentation/staging/tee.rst index 3c63d8dcd61eb7b4ed65e202087e7dfb0812b711..498343c7ab088e8ed6b981ead561dd94f39fbff6 100644 --- a/Documentation/staging/tee.rst +++ b/Documentation/staging/tee.rst @@ -255,7 +255,7 @@ The following picture shows a high level overview of AMD-TEE:: +--------------------------+ +---------+--------------------+ At the lowest level (in x86), the AMD Secure Processor (ASP) driver uses the -CPU to PSP mailbox regsister to submit commands to the PSP. The format of the +CPU to PSP mailbox register to submit commands to the PSP. The format of the command buffer is opaque to the ASP driver. It's role is to submit commands to the secure processor and return results to AMD-TEE driver. The interface between AMD-TEE driver and AMD Secure Processor driver can be found in [6]. @@ -290,7 +290,7 @@ cancel_req driver callback is not supported by AMD-TEE. The GlobalPlatform TEE Client API [5] can be used by the user space (client) to talk to AMD's TEE. AMD's TEE provides a secure environment for loading, opening -a session, invoking commands and clossing session with TA. +a session, invoking commands and closing session with TA. References ========== diff --git a/Documentation/tools/index.rst b/Documentation/tools/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..0bb1e61bdcc0fe4a0848988e19010617664e9d6f --- /dev/null +++ b/Documentation/tools/index.rst @@ -0,0 +1,20 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============ +Kernel tools +============ + +This book covers user-space tools that are shipped with the kernel source; +more additions are needed here: + +.. toctree:: + :maxdepth: 1 + + rtla/index + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/tools/rtla/index.rst b/Documentation/tools/rtla/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..840f0bf3e8036f64b006146e195383cc69a0f593 --- /dev/null +++ b/Documentation/tools/rtla/index.rst @@ -0,0 +1,26 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================================ +The realtime Linux analysis tool +================================ + +RTLA provides a set of tools for the analysis of the kernel's realtime +behavior on specific hardware. + +.. toctree:: + :maxdepth: 1 + + rtla + rtla-osnoise + rtla-osnoise-hist + rtla-osnoise-top + rtla-timerlat + rtla-timerlat-hist + rtla-timerlat-top + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index b3166c4a78678fa22f63cda44b5431c8b5d88358..45b8c56af67a74230cea7ed1a9c7eeccf558c436 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -3370,7 +3370,7 @@ one of the latency tracers, you will get the following results. Instances --------- -In the tracefs tracing directory is a directory called "instances". +In the tracefs tracing directory, there is a directory called "instances". This directory can have new directories created inside of it using mkdir, and removing directories with rmdir. The directory created with mkdir in this directory will already contain files and other diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 687efcf245c13d34196044e2500354224f0fa538..e6fce2cbd99ed475c7f5ca3bdd1d3c2f2acaafd6 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -115,6 +115,7 @@ Code Seq# Include File Comments 'B' 00-1F linux/cciss_ioctl.h conflict! 'B' 00-0F include/linux/pmu.h conflict! 'B' C0-FF advanced bbus +'B' 00-0F xen/xenbus_dev.h conflict! 'C' all linux/soundcard.h conflict! 'C' 01-2F linux/capi.h conflict! 'C' F0-FF drivers/net/wan/cosa.h conflict! @@ -134,6 +135,7 @@ Code Seq# Include File Comments 'F' 80-8F linux/arcfb.h conflict! 'F' DD video/sstfb.h conflict! 'G' 00-3F drivers/misc/sgi-gru/grulib.h conflict! +'G' 00-0F xen/gntalloc.h, xen/gntdev.h conflict! 'H' 00-7F linux/hiddev.h conflict! 'H' 00-0F linux/hidraw.h conflict! 'H' 01 linux/mei.h conflict! @@ -176,6 +178,7 @@ Code Seq# Include File Comments 'P' 60-6F sound/sscape_ioctl.h conflict! 'P' 00-0F drivers/usb/class/usblp.c conflict! 'P' 01-09 drivers/misc/pci_endpoint_test.c conflict! +'P' 00-0F xen/privcmd.h conflict! 'Q' all linux/soundcard.h 'R' 00-1F linux/random.h conflict! 'R' 01 linux/rfkill.h conflict! diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index d3791a14eb9ad02bccaec50902e4c5cb6ab5256c..a4267104db50a490b481729e282017e7f453754c 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3268,6 +3268,7 @@ number. :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set) :Type: device ioctl, vm ioctl, vcpu ioctl :Parameters: struct kvm_device_attr :Returns: 0 on success, -1 on error @@ -3302,7 +3303,8 @@ transferred is defined by the particular attribute. ------------------------ :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, - KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_VCPU_ATTRIBUTES for vcpu device + KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device :Type: device ioctl, vm ioctl, vcpu ioctl :Parameters: struct kvm_device_attr :Returns: 0 on success, -1 on error @@ -5545,8 +5547,8 @@ the trailing ``'\0'``, is indicated by ``name_size`` in the header. The Stats Data block contains an array of 64-bit values in the same order as the descriptors in Descriptors block. -4.42 KVM_GET_XSAVE2 ------------------- +4.134 KVM_GET_XSAVE2 +-------------------- :Capability: KVM_CAP_XSAVE2 :Architectures: x86 @@ -7363,7 +7365,7 @@ trap and emulate MSRs that are outside of the scope of KVM as well as limit the attack surface on KVM's MSR emulation code. 8.28 KVM_CAP_ENFORCE_PV_FEATURE_CPUID ------------------------------ +------------------------------------- Architectures: x86 diff --git a/Documentation/vm/cleancache.rst b/Documentation/vm/cleancache.rst deleted file mode 100644 index 68cba9131c318f26fb071e830e3307670d75a227..0000000000000000000000000000000000000000 --- a/Documentation/vm/cleancache.rst +++ /dev/null @@ -1,296 +0,0 @@ -.. _cleancache: - -========== -Cleancache -========== - -Motivation -========== - -Cleancache is a new optional feature provided by the VFS layer that -potentially dramatically increases page cache effectiveness for -many workloads in many environments at a negligible cost. - -Cleancache can be thought of as a page-granularity victim cache for clean -pages that the kernel's pageframe replacement algorithm (PFRA) would like -to keep around, but can't since there isn't enough memory. So when the -PFRA "evicts" a page, it first attempts to use cleancache code to -put the data contained in that page into "transcendent memory", memory -that is not directly accessible or addressable by the kernel and is -of unknown and possibly time-varying size. - -Later, when a cleancache-enabled filesystem wishes to access a page -in a file on disk, it first checks cleancache to see if it already -contains it; if it does, the page of data is copied into the kernel -and a disk access is avoided. - -Transcendent memory "drivers" for cleancache are currently implemented -in Xen (using hypervisor memory) and zcache (using in-kernel compressed -memory) and other implementations are in development. - -:ref:`FAQs ` are included below. - -Implementation Overview -======================= - -A cleancache "backend" that provides transcendent memory registers itself -to the kernel's cleancache "frontend" by calling cleancache_register_ops, -passing a pointer to a cleancache_ops structure with funcs set appropriately. -The functions provided must conform to certain semantics as follows: - -Most important, cleancache is "ephemeral". Pages which are copied into -cleancache have an indefinite lifetime which is completely unknowable -by the kernel and so may or may not still be in cleancache at any later time. -Thus, as its name implies, cleancache is not suitable for dirty pages. -Cleancache has complete discretion over what pages to preserve and what -pages to discard and when. - -Mounting a cleancache-enabled filesystem should call "init_fs" to obtain a -pool id which, if positive, must be saved in the filesystem's superblock; -a negative return value indicates failure. A "put_page" will copy a -(presumably about-to-be-evicted) page into cleancache and associate it with -the pool id, a file key, and a page index into the file. (The combination -of a pool id, a file key, and an index is sometimes called a "handle".) -A "get_page" will copy the page, if found, from cleancache into kernel memory. -An "invalidate_page" will ensure the page no longer is present in cleancache; -an "invalidate_inode" will invalidate all pages associated with the specified -file; and, when a filesystem is unmounted, an "invalidate_fs" will invalidate -all pages in all files specified by the given pool id and also surrender -the pool id. - -An "init_shared_fs", like init_fs, obtains a pool id but tells cleancache -to treat the pool as shared using a 128-bit UUID as a key. On systems -that may run multiple kernels (such as hard partitioned or virtualized -systems) that may share a clustered filesystem, and where cleancache -may be shared among those kernels, calls to init_shared_fs that specify the -same UUID will receive the same pool id, thus allowing the pages to -be shared. Note that any security requirements must be imposed outside -of the kernel (e.g. by "tools" that control cleancache). Or a -cleancache implementation can simply disable shared_init by always -returning a negative value. - -If a get_page is successful on a non-shared pool, the page is invalidated -(thus making cleancache an "exclusive" cache). On a shared pool, the page -is NOT invalidated on a successful get_page so that it remains accessible to -other sharers. The kernel is responsible for ensuring coherency between -cleancache (shared or not), the page cache, and the filesystem, using -cleancache invalidate operations as required. - -Note that cleancache must enforce put-put-get coherency and get-get -coherency. For the former, if two puts are made to the same handle but -with different data, say AAA by the first put and BBB by the second, a -subsequent get can never return the stale data (AAA). For get-get coherency, -if a get for a given handle fails, subsequent gets for that handle will -never succeed unless preceded by a successful put with that handle. - -Last, cleancache provides no SMP serialization guarantees; if two -different Linux threads are simultaneously putting and invalidating a page -with the same handle, the results are indeterminate. Callers must -lock the page to ensure serial behavior. - -Cleancache Performance Metrics -============================== - -If properly configured, monitoring of cleancache is done via debugfs in -the `/sys/kernel/debug/cleancache` directory. The effectiveness of cleancache -can be measured (across all filesystems) with: - -``succ_gets`` - number of gets that were successful - -``failed_gets`` - number of gets that failed - -``puts`` - number of puts attempted (all "succeed") - -``invalidates`` - number of invalidates attempted - -A backend implementation may provide additional metrics. - -.. _faq: - -FAQ -=== - -* Where's the value? (Andrew Morton) - -Cleancache provides a significant performance benefit to many workloads -in many environments with negligible overhead by improving the -effectiveness of the pagecache. Clean pagecache pages are -saved in transcendent memory (RAM that is otherwise not directly -addressable to the kernel); fetching those pages later avoids "refaults" -and thus disk reads. - -Cleancache (and its sister code "frontswap") provide interfaces for -this transcendent memory (aka "tmem"), which conceptually lies between -fast kernel-directly-addressable RAM and slower DMA/asynchronous devices. -Disallowing direct kernel or userland reads/writes to tmem -is ideal when data is transformed to a different form and size (such -as with compression) or secretly moved (as might be useful for write- -balancing for some RAM-like devices). Evicted page-cache pages (and -swap pages) are a great use for this kind of slower-than-RAM-but-much- -faster-than-disk transcendent memory, and the cleancache (and frontswap) -"page-object-oriented" specification provides a nice way to read and -write -- and indirectly "name" -- the pages. - -In the virtual case, the whole point of virtualization is to statistically -multiplex physical resources across the varying demands of multiple -virtual machines. This is really hard to do with RAM and efforts to -do it well with no kernel change have essentially failed (except in some -well-publicized special-case workloads). Cleancache -- and frontswap -- -with a fairly small impact on the kernel, provide a huge amount -of flexibility for more dynamic, flexible RAM multiplexing. -Specifically, the Xen Transcendent Memory backend allows otherwise -"fallow" hypervisor-owned RAM to not only be "time-shared" between multiple -virtual machines, but the pages can be compressed and deduplicated to -optimize RAM utilization. And when guest OS's are induced to surrender -underutilized RAM (e.g. with "self-ballooning"), page cache pages -are the first to go, and cleancache allows those pages to be -saved and reclaimed if overall host system memory conditions allow. - -And the identical interface used for cleancache can be used in -physical systems as well. The zcache driver acts as a memory-hungry -device that stores pages of data in a compressed state. And -the proposed "RAMster" driver shares RAM across multiple physical -systems. - -* Why does cleancache have its sticky fingers so deep inside the - filesystems and VFS? (Andrew Morton and Christoph Hellwig) - -The core hooks for cleancache in VFS are in most cases a single line -and the minimum set are placed precisely where needed to maintain -coherency (via cleancache_invalidate operations) between cleancache, -the page cache, and disk. All hooks compile into nothingness if -cleancache is config'ed off and turn into a function-pointer- -compare-to-NULL if config'ed on but no backend claims the ops -functions, or to a compare-struct-element-to-negative if a -backend claims the ops functions but a filesystem doesn't enable -cleancache. - -Some filesystems are built entirely on top of VFS and the hooks -in VFS are sufficient, so don't require an "init_fs" hook; the -initial implementation of cleancache didn't provide this hook. -But for some filesystems (such as btrfs), the VFS hooks are -incomplete and one or more hooks in fs-specific code are required. -And for some other filesystems, such as tmpfs, cleancache may -be counterproductive. So it seemed prudent to require a filesystem -to "opt in" to use cleancache, which requires adding a hook in -each filesystem. Not all filesystems are supported by cleancache -only because they haven't been tested. The existing set should -be sufficient to validate the concept, the opt-in approach means -that untested filesystems are not affected, and the hooks in the -existing filesystems should make it very easy to add more -filesystems in the future. - -The total impact of the hooks to existing fs and mm files is only -about 40 lines added (not counting comments and blank lines). - -* Why not make cleancache asynchronous and batched so it can more - easily interface with real devices with DMA instead of copying each - individual page? (Minchan Kim) - -The one-page-at-a-time copy semantics simplifies the implementation -on both the frontend and backend and also allows the backend to -do fancy things on-the-fly like page compression and -page deduplication. And since the data is "gone" (copied into/out -of the pageframe) before the cleancache get/put call returns, -a great deal of race conditions and potential coherency issues -are avoided. While the interface seems odd for a "real device" -or for real kernel-addressable RAM, it makes perfect sense for -transcendent memory. - -* Why is non-shared cleancache "exclusive"? And where is the - page "invalidated" after a "get"? (Minchan Kim) - -The main reason is to free up space in transcendent memory and -to avoid unnecessary cleancache_invalidate calls. If you want inclusive, -the page can be "put" immediately following the "get". If -put-after-get for inclusive becomes common, the interface could -be easily extended to add a "get_no_invalidate" call. - -The invalidate is done by the cleancache backend implementation. - -* What's the performance impact? - -Performance analysis has been presented at OLS'09 and LCA'10. -Briefly, performance gains can be significant on most workloads, -especially when memory pressure is high (e.g. when RAM is -overcommitted in a virtual workload); and because the hooks are -invoked primarily in place of or in addition to a disk read/write, -overhead is negligible even in worst case workloads. Basically -cleancache replaces I/O with memory-copy-CPU-overhead; on older -single-core systems with slow memory-copy speeds, cleancache -has little value, but in newer multicore machines, especially -consolidated/virtualized machines, it has great value. - -* How do I add cleancache support for filesystem X? (Boaz Harrash) - -Filesystems that are well-behaved and conform to certain -restrictions can utilize cleancache simply by making a call to -cleancache_init_fs at mount time. Unusual, misbehaving, or -poorly layered filesystems must either add additional hooks -and/or undergo extensive additional testing... or should just -not enable the optional cleancache. - -Some points for a filesystem to consider: - - - The FS should be block-device-based (e.g. a ram-based FS such - as tmpfs should not enable cleancache) - - To ensure coherency/correctness, the FS must ensure that all - file removal or truncation operations either go through VFS or - add hooks to do the equivalent cleancache "invalidate" operations - - To ensure coherency/correctness, either inode numbers must - be unique across the lifetime of the on-disk file OR the - FS must provide an "encode_fh" function. - - The FS must call the VFS superblock alloc and deactivate routines - or add hooks to do the equivalent cleancache calls done there. - - To maximize performance, all pages fetched from the FS should - go through the do_mpag_readpage routine or the FS should add - hooks to do the equivalent (cf. btrfs) - - Currently, the FS blocksize must be the same as PAGESIZE. This - is not an architectural restriction, but no backends currently - support anything different. - - A clustered FS should invoke the "shared_init_fs" cleancache - hook to get best performance for some backends. - -* Why not use the KVA of the inode as the key? (Christoph Hellwig) - -If cleancache would use the inode virtual address instead of -inode/filehandle, the pool id could be eliminated. But, this -won't work because cleancache retains pagecache data pages -persistently even when the inode has been pruned from the -inode unused list, and only invalidates the data page if the file -gets removed/truncated. So if cleancache used the inode kva, -there would be potential coherency issues if/when the inode -kva is reused for a different file. Alternately, if cleancache -invalidated the pages when the inode kva was freed, much of the value -of cleancache would be lost because the cache of pages in cleanache -is potentially much larger than the kernel pagecache and is most -useful if the pages survive inode cache removal. - -* Why is a global variable required? - -The cleancache_enabled flag is checked in all of the frequently-used -cleancache hooks. The alternative is a function call to check a static -variable. Since cleancache is enabled dynamically at runtime, systems -that don't enable cleancache would suffer thousands (possibly -tens-of-thousands) of unnecessary function calls per second. So the -global variable allows cleancache to be enabled by default at compile -time, but have insignificant performance impact when cleancache remains -disabled at runtime. - -* Does cleanache work with KVM? - -The memory model of KVM is sufficiently different that a cleancache -backend may have less value for KVM. This remains to be tested, -especially in an overcommitted system. - -* Does cleancache work in userspace? It sounds useful for - memory hungry caches like web browsers. (Jamie Lokier) - -No plans yet, though we agree it sounds useful, at least for -apps that bypass the page cache (e.g. O_DIRECT). - -Last updated: Dan Magenheimer, April 13 2011 diff --git a/Documentation/vm/frontswap.rst b/Documentation/vm/frontswap.rst index 1979f430c1c5b4cd6477d159a89efe82c6e65637..feecc5e2447780c2edb7e5c28cf7f5178ff925c2 100644 --- a/Documentation/vm/frontswap.rst +++ b/Documentation/vm/frontswap.rst @@ -8,12 +8,6 @@ Frontswap provides a "transcendent memory" interface for swap pages. In some environments, dramatic performance savings may be obtained because swapped pages are saved in RAM (or a RAM-like device) instead of a swap disk. -(Note, frontswap -- and :ref:`cleancache` (merged at 3.0) -- are the "frontends" -and the only necessary changes to the core kernel for transcendent memory; -all other supporting code -- the "backends" -- is implemented as drivers. -See the LWN.net article `Transcendent memory in a nutshell`_ -for a detailed overview of frontswap and related kernel parts) - .. _Transcendent memory in a nutshell: https://lwn.net/Articles/454795/ Frontswap is so named because it can be thought of as the opposite of @@ -45,12 +39,6 @@ a disk write and, if the data is later read back, a disk read are avoided. If a store returns failure, transcendent memory has rejected the data, and the page can be written to swap as usual. -If a backend chooses, frontswap can be configured as a "writethrough -cache" by calling frontswap_writethrough(). In this mode, the reduction -in swap device writes is lost (and also a non-trivial performance advantage) -in order to allow the backend to arbitrarily "reclaim" space used to -store frontswap pages to more completely manage its memory usage. - Note that if a page is stored and the page already exists in transcendent memory (a "duplicate" store), either the store succeeds and the data is overwritten, or the store fails AND the page is invalidated. This ensures stale data may @@ -87,11 +75,9 @@ This interface is ideal when data is transformed to a different form and size (such as with compression) or secretly moved (as might be useful for write-balancing for some RAM-like devices). Swap pages (and evicted page-cache pages) are a great use for this kind of slower-than-RAM- -but-much-faster-than-disk "pseudo-RAM device" and the frontswap (and -cleancache) interface to transcendent memory provides a nice way to read -and write -- and indirectly "name" -- the pages. +but-much-faster-than-disk "pseudo-RAM device". -Frontswap -- and cleancache -- with a fairly small impact on the kernel, +Frontswap with a fairly small impact on the kernel, provides a huge amount of flexibility for more dynamic, flexible RAM utilization in various system configurations: @@ -269,19 +255,6 @@ the old data and ensure that it is no longer accessible. Since the swap subsystem then writes the new data to the read swap device, this is the correct course of action to ensure coherency. -* What is frontswap_shrink for? - -When the (non-frontswap) swap subsystem swaps out a page to a real -swap device, that page is only taking up low-value pre-allocated disk -space. But if frontswap has placed a page in transcendent memory, that -page may be taking up valuable real estate. The frontswap_shrink -routine allows code outside of the swap subsystem to force pages out -of the memory managed by frontswap and back into kernel-addressable memory. -For example, in RAMster, a "suction driver" thread will attempt -to "repatriate" pages sent to a remote machine back to the local machine; -this is driven using the frontswap_shrink mechanism when memory pressure -subsides. - * Why does the frontswap patch create the new include file swapfile.h? The frontswap code depends on some swap-subsystem-internal data diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst index 932440805453e6a5dd897a488e26b91276e65c9a..44365c4574a378f5572c1c4825a9b52746ed3351 100644 --- a/Documentation/vm/index.rst +++ b/Documentation/vm/index.rst @@ -15,7 +15,6 @@ algorithms. If you are looking for advice on simply allocating memory, see the active_mm arch_pgtable_helpers balance - cleancache damon/index free_page_reporting frontswap diff --git a/Documentation/vm/page_table_check.rst b/Documentation/vm/page_table_check.rst index 81f521ff7ea7070f593cd89479430c9ab9eb9d3d..1a09472f10a3c8a1c1b031df75084f46667f2bcc 100644 --- a/Documentation/vm/page_table_check.rst +++ b/Documentation/vm/page_table_check.rst @@ -9,7 +9,7 @@ Page Table Check Introduction ============ -Page table check allows to hardern the kernel by ensuring that some types of +Page table check allows to harden the kernel by ensuring that some types of the memory corruptions are prevented. Page table check performs extra verifications at the time when new pages become diff --git a/MAINTAINERS b/MAINTAINERS index 6c08cbe953fed0a6366997d7e90cbc836a150a0f..fca970a46e77af110ed1d592f40567c82334b12d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -190,8 +190,9 @@ M: Johannes Berg L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git +Q: https://patchwork.kernel.org/project/linux-wireless/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/driver-api/80211/cfg80211.rst F: Documentation/networking/regulatory.rst F: include/linux/ieee80211.h @@ -1619,6 +1620,7 @@ M: Olof Johansson M: soc@kernel.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained +C: irc://irc.libera.chat/armlinux T: git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git F: arch/arm/boot/dts/Makefile F: arch/arm64/boot/dts/Makefile @@ -1626,6 +1628,7 @@ F: arch/arm64/boot/dts/Makefile ARM SUB-ARCHITECTURES L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained +C: irc://irc.libera.chat/armlinux T: git git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc.git F: arch/arm/mach-*/ F: arch/arm/plat-*/ @@ -1779,6 +1782,7 @@ F: drivers/irqchip/irq-apple-aic.c F: drivers/mailbox/apple-mailbox.c F: drivers/pinctrl/pinctrl-apple-gpio.c F: drivers/soc/apple/* +F: drivers/watchdog/apple_wdt.c F: include/dt-bindings/interrupt-controller/apple-aic.h F: include/dt-bindings/pinctrl/apple.h F: include/linux/apple-mailbox.h @@ -2569,10 +2573,13 @@ N: rockchip ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES M: Krzysztof Kozlowski +R: Alim Akhtar L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org S: Maintained +C: irc://irc.libera.chat/linux-exynos Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git F: Documentation/arm/samsung/ F: Documentation/devicetree/bindings/arm/samsung/ F: Documentation/devicetree/bindings/power/pd-samsung.yaml @@ -3410,14 +3417,14 @@ M: Yury Norov R: Andy Shevchenko R: Rasmus Villemoes S: Maintained -F: include/asm-generic/bitops/find.h F: include/linux/bitmap.h +F: include/linux/find.h F: lib/bitmap.c F: lib/find_bit.c F: lib/find_bit_benchmark.c F: lib/test_bitmap.c -F: tools/include/asm-generic/bitops/find.h F: tools/include/linux/bitmap.h +F: tools/include/linux/find.h F: tools/lib/bitmap.c F: tools/lib/find_bit.c @@ -4156,9 +4163,8 @@ N: csky K: csky CA8210 IEEE-802.15.4 RADIO DRIVER -M: Harry Morris L: linux-wpan@vger.kernel.org -S: Maintained +S: Orphan W: https://github.com/Cascoda/ca8210-linux.git F: Documentation/devicetree/bindings/net/ieee802154/ca8210.txt F: drivers/net/ieee802154/ca8210.c @@ -4705,13 +4711,6 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/cla F: include/linux/cfi.h F: kernel/cfi.c -CLEANCACHE API -M: Konrad Rzeszutek Wilk -L: linux-kernel@vger.kernel.org -S: Maintained -F: include/linux/cleancache.h -F: mm/cleancache.c - CLK API M: Russell King L: linux-clk@vger.kernel.org @@ -5779,7 +5778,7 @@ F: tools/testing/selftests/dma/ DMA-BUF HEAPS FRAMEWORK M: Sumit Semwal -R: Benjamin Gaignard +R: Benjamin Gaignard R: Liam Mark R: Laura Abbott R: Brian Starkey @@ -6509,7 +6508,7 @@ F: Documentation/devicetree/bindings/display/rockchip/ F: drivers/gpu/drm/rockchip/ DRM DRIVERS FOR STI -M: Benjamin Gaignard +M: Alain Volmat L: dri-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc @@ -6518,8 +6517,8 @@ F: drivers/gpu/drm/sti DRM DRIVERS FOR STM M: Yannick Fertre +M: Raphael Gallais-Pou M: Philippe Cornu -M: Benjamin Gaignard L: dri-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc @@ -7215,8 +7214,10 @@ F: drivers/net/mdio/of_mdio.c F: drivers/net/pcs/ F: drivers/net/phy/ F: include/dt-bindings/net/qca-ar803x.h +F: include/linux/linkmode.h F: include/linux/*mdio*.h F: include/linux/mdio/*.h +F: include/linux/mii.h F: include/linux/of_net.h F: include/linux/phy.h F: include/linux/phy_fixed.h @@ -7580,6 +7581,12 @@ S: Maintained W: http://floatingpoint.sourceforge.net/emulator/index.html F: arch/x86/math-emu/ +FRAMEBUFFER CORE +M: Daniel Vetter +F: drivers/video/fbdev/core/ +S: Odd Fixes +T: git git://anongit.freedesktop.org/drm/drm-misc + FRAMEBUFFER LAYER M: Helge Deller L: linux-fbdev@vger.kernel.org @@ -10884,6 +10891,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git F: drivers/ata/pata_arasan_cf.c F: include/linux/pata_arasan_cf_data.h +LIBATA PATA DRIVERS +R: Sergey Shtylyov +L: linux-ide@vger.kernel.org +F: drivers/ata/ata_*.c +F: drivers/ata/pata_*.c + LIBATA PATA FARADAY FTIDE010 AND GEMINI SATA BRIDGE DRIVERS M: Linus Walleij L: linux-ide@vger.kernel.org @@ -11373,8 +11386,9 @@ M: Johannes Berg L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git +Q: https://patchwork.kernel.org/project/linux-wireless/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/networking/mac80211-injection.rst F: Documentation/networking/mac80211_hwsim/mac80211_hwsim.rst F: drivers/net/wireless/mac80211_hwsim.[ch] @@ -12403,7 +12417,7 @@ F: include/uapi/linux/membarrier.h F: kernel/sched/membarrier.c MEMBLOCK -M: Mike Rapoport +M: Mike Rapoport L: linux-mm@kvack.org S: Maintained F: Documentation/core-api/boot-time-mm.rst @@ -13301,8 +13315,8 @@ W: http://www.iptables.org/ W: http://www.nftables.org/ Q: http://patchwork.ozlabs.org/project/netfilter-devel/list/ C: irc://irc.libera.chat/netfilter -T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next.git F: include/linux/netfilter* F: include/linux/netfilter/ F: include/net/netfilter/ @@ -13381,9 +13395,10 @@ NETWORKING DRIVERS (WIRELESS) M: Kalle Valo L: linux-wireless@vger.kernel.org S: Maintained -Q: http://patchwork.kernel.org/project/linux-wireless/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers-next.git +W: https://wireless.wiki.kernel.org/ +Q: https://patchwork.kernel.org/project/linux-wireless/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/devicetree/bindings/net/wireless/ F: drivers/net/wireless/ @@ -13456,7 +13471,11 @@ L: netdev@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git F: arch/x86/net/* +F: include/linux/ip.h +F: include/linux/ipv6* +F: include/net/fib* F: include/net/ip* +F: include/net/route.h F: net/ipv4/ F: net/ipv6/ @@ -13517,10 +13536,6 @@ F: include/net/tls.h F: include/uapi/linux/tls.h F: net/tls/* -NETWORKING [WIRELESS] -L: linux-wireless@vger.kernel.org -Q: http://patchwork.kernel.org/project/linux-wireless/list/ - NETXEN (1/10) GbE SUPPORT M: Manish Chopra M: Rahul Verma @@ -13568,7 +13583,7 @@ F: tools/testing/selftests/nci/ NFS, SUNRPC, AND LOCKD CLIENTS M: Trond Myklebust -M: Anna Schumaker +M: Anna Schumaker L: linux-nfs@vger.kernel.org S: Maintained W: http://client.linux-nfs.org @@ -14386,6 +14401,7 @@ M: Rob Herring M: Frank Rowand L: devicetree@vger.kernel.org S: Maintained +C: irc://irc.libera.chat/devicetree W: http://www.devicetree.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git F: Documentation/ABI/testing/sysfs-firmware-ofw @@ -14397,6 +14413,7 @@ OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS M: Rob Herring L: devicetree@vger.kernel.org S: Maintained +C: irc://irc.libera.chat/devicetree Q: http://patchwork.ozlabs.org/project/devicetree-bindings/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux.git F: Documentation/devicetree/ @@ -15287,9 +15304,11 @@ PIN CONTROLLER - SAMSUNG M: Tomasz Figa M: Krzysztof Kozlowski M: Sylwester Nawrocki +R: Alim Akhtar L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org S: Maintained +C: irc://irc.libera.chat/linux-exynos Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/samsung.git F: Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt @@ -16471,6 +16490,14 @@ F: Documentation/devicetree/bindings/i2c/renesas,rmobile-iic.yaml F: drivers/i2c/busses/i2c-rcar.c F: drivers/i2c/busses/i2c-sh_mobile.c +RENESAS R-CAR SATA DRIVER +R: Sergey Shtylyov +S: Supported +L: linux-ide@vger.kernel.org +L: linux-renesas-soc@vger.kernel.org +F: Documentation/devicetree/bindings/ata/renesas,rcar-sata.yaml +F: drivers/ata/sata_rcar.c + RENESAS R-CAR THERMAL DRIVERS M: Niklas Söderlund L: linux-renesas-soc@vger.kernel.org @@ -16539,8 +16566,9 @@ M: Johannes Berg L: linux-wireless@vger.kernel.org S: Maintained W: https://wireless.wiki.kernel.org/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git +Q: https://patchwork.kernel.org/project/linux-wireless/list/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/wireless/wireless-next.git F: Documentation/ABI/stable/sysfs-class-rfkill F: Documentation/driver-api/rfkill.rst F: include/linux/rfkill.h @@ -16805,8 +16833,8 @@ F: drivers/video/fbdev/savage/ S390 M: Heiko Carstens M: Vasily Gorbik -M: Christian Borntraeger -R: Alexander Gordeev +M: Alexander Gordeev +R: Christian Borntraeger R: Sven Schnelle L: linux-s390@vger.kernel.org S: Supported @@ -17077,6 +17105,7 @@ SAMSUNG SOC CLOCK DRIVERS M: Sylwester Nawrocki M: Tomasz Figa M: Chanwoo Choi +R: Alim Akhtar L: linux-samsung-soc@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git @@ -17713,6 +17742,21 @@ S: Maintained W: http://www.winischhofer.at/linuxsisusbvga.shtml F: drivers/usb/misc/sisusbvga/ +SL28 CPLD MFD DRIVER +M: Michael Walle +S: Maintained +F: Documentation/devicetree/bindings/gpio/kontron,sl28cpld-gpio.yaml +F: Documentation/devicetree/bindings/hwmon/kontron,sl28cpld-hwmon.yaml +F: Documentation/devicetree/bindings/interrupt-controller/kontron,sl28cpld-intc.yaml +F: Documentation/devicetree/bindings/mfd/kontron,sl28cpld.yaml +F: Documentation/devicetree/bindings/pwm/kontron,sl28cpld-pwm.yaml +F: Documentation/devicetree/bindings/watchdog/kontron,sl28cpld-wdt.yaml +F: drivers/gpio/gpio-sl28cpld.c +F: drivers/hwmon/sl28cpld-hwmon.c +F: drivers/irqchip/irq-sl28cpld.c +F: drivers/pwm/pwm-sl28cpld.c +F: drivers/watchdog/sl28cpld_wdt.c + SLAB ALLOCATOR M: Christoph Lameter M: Pekka Enberg @@ -18429,7 +18473,7 @@ F: Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt F: sound/soc/sti/ STI CEC DRIVER -M: Benjamin Gaignard +M: Alain Volmat S: Maintained F: Documentation/devicetree/bindings/media/stih-cec.txt F: drivers/media/cec/platform/sti/ @@ -19583,6 +19627,14 @@ F: Documentation/trace/timerlat-tracer.rst F: Documentation/trace/hwlat_detector.rst F: arch/*/kernel/trace.c +Real-time Linux Analysis (RTLA) tools +M: Daniel Bristot de Oliveira +M: Steven Rostedt +L: linux-trace-devel@vger.kernel.org +S: Maintained +F: Documentation/tools/rtla/ +F: tools/tracing/rtla/ + TRADITIONAL CHINESE DOCUMENTATION M: Hu Haowen L: linux-doc-tw-discuss@lists.sourceforge.net diff --git a/Makefile b/Makefile index 3f07f0f04475c31fb5cd4088afade82a276c4ba8..51e142f760f7c71b226f9c25bf6da21844fcf9da 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 -PATCHLEVEL = 16 +PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -NAME = Gobble Gobble +EXTRAVERSION = -rc4 +NAME = Superb Owl # *DOCUMENTATION* # To see a list of typical targets execute "make help" @@ -778,7 +778,7 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong KBUILD_CFLAGS += $(stackp-flags-y) KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror -KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH:"%"=%) +KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH) ifdef CONFIG_CC_IS_CLANG KBUILD_CPPFLAGS += -Qunused-arguments diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h index 5adca78830b5e9a3349e5fb410188b0eb98e3bd3..e1d8483a45f2e448bccbacb6dfaa8124d8864b21 100644 --- a/arch/alpha/include/asm/bitops.h +++ b/arch/alpha/include/asm/bitops.h @@ -430,8 +430,6 @@ static inline unsigned int __arch_hweight8(unsigned int w) #endif /* __KERNEL__ */ -#include - #ifdef __KERNEL__ /* diff --git a/arch/alpha/kernel/srm_env.c b/arch/alpha/kernel/srm_env.c index 528d2be58182983b59c68ac76d5b362119519ec8..217b4dca51dd9fb8af068afbea941b587397155f 100644 --- a/arch/alpha/kernel/srm_env.c +++ b/arch/alpha/kernel/srm_env.c @@ -83,14 +83,14 @@ static int srm_env_proc_show(struct seq_file *m, void *v) static int srm_env_proc_open(struct inode *inode, struct file *file) { - return single_open(file, srm_env_proc_show, PDE_DATA(inode)); + return single_open(file, srm_env_proc_show, pde_data(inode)); } static ssize_t srm_env_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { int res; - unsigned long id = (unsigned long)PDE_DATA(file_inode(file)); + unsigned long id = (unsigned long)pde_data(file_inode(file)); char *buf = (char *) __get_free_page(GFP_USER); unsigned long ret1, ret2; diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index f74d9860a4420c6e9ecf1f611d232c028232468e..3c2a4753d09b62aa16f2ca4180608c42ef0a2457 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -20,7 +20,6 @@ config ARC select COMMON_CLK select DMA_DIRECT_REMAP select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) - select GENERIC_FIND_FIRST_BIT # for now, we don't need GENERIC_IRQ_PROBE, CONFIG_GENERIC_IRQ_CHIP select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h index a7daaf64ae34410fdba74176665a561ef20984a8..bdb7e190a294e7c4789e00d681bfaa541c37541f 100644 --- a/arch/arc/include/asm/bitops.h +++ b/arch/arc/include/asm/bitops.h @@ -189,7 +189,6 @@ static inline __attribute__ ((const)) unsigned long __ffs(unsigned long x) #include #include -#include #include #include diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fabe39169b120ca6c11fb3037b2c134f0faa7b5c..4c97cb40eebb632eafc036ee6d4f8c4dce570b35 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -83,6 +83,7 @@ config ARM select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32 select HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_DEBUG_KMEMLEAK if !XIP_KERNEL select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 235ad559acb2869481615b78abe4e933630bfbb0..e41eca79c9504a51ef3fa5b643af128e3db92f06 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -806,6 +806,7 @@ dtb-$(CONFIG_ARCH_OMAP3) += \ logicpd-som-lv-37xx-devkit.dtb \ omap3430-sdp.dtb \ omap3-beagle.dtb \ + omap3-beagle-ab4.dtb \ omap3-beagle-xm.dtb \ omap3-beagle-xm-ab.dtb \ omap3-cm-t3517.dtb \ diff --git a/arch/arm/boot/dts/am335x-wega.dtsi b/arch/arm/boot/dts/am335x-wega.dtsi index 673159d93a6adef51830c53067c539cd531626e4..f957fea8208e134e33358dbcc7950003d7b1b414 100644 --- a/arch/arm/boot/dts/am335x-wega.dtsi +++ b/arch/arm/boot/dts/am335x-wega.dtsi @@ -55,7 +55,7 @@ 2 1 0 0 /* # 0: INACTIVE, 1: TX, 2: RX */ >; tx-num-evt = <16>; - rt-num-evt = <16>; + rx-num-evt = <16>; status = "okay"; }; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 6b485cbed8d50147ad387ad1fed7e7e28835eb19..42bff117656cf2fd4d795e9c8cd724d2aea4a55d 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -160,7 +160,7 @@ target-module@48210000 { compatible = "ti,sysc-omap4-simple", "ti,sysc"; power-domains = <&prm_mpu>; - clocks = <&mpu_clkctrl DRA7_MPU_CLKCTRL 0>; + clocks = <&mpu_clkctrl DRA7_MPU_MPU_CLKCTRL 0>; clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; @@ -875,10 +875,10 @@ <0x58000014 4>; reg-names = "rev", "syss"; ti,syss-mask = <1>; - clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 0>, - <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 9>, - <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 10>, - <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 11>; + clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 0>, + <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 9>, + <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 10>, + <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 11>; clock-names = "fck", "hdmi_clk", "sys_clk", "tv_clk"; #address-cells = <1>; #size-cells = <1>; @@ -912,7 +912,7 @@ SYSC_OMAP2_SOFTRESET | SYSC_OMAP2_AUTOIDLE)>; ti,syss-mask = <1>; - clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 8>; + clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 8>; clock-names = "fck"; #address-cells = <1>; #size-cells = <1>; @@ -939,8 +939,8 @@ , ; ti,sysc-mask = <(SYSC_OMAP4_SOFTRESET)>; - clocks = <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 9>, - <&dss_clkctrl DRA7_DSS_CORE_CLKCTRL 8>; + clocks = <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 9>, + <&dss_clkctrl DRA7_DSS_DSS_CORE_CLKCTRL 8>; clock-names = "fck", "dss_clk"; #address-cells = <1>; #size-cells = <1>; @@ -979,7 +979,7 @@ compatible = "vivante,gc"; reg = <0x0 0x700>; interrupts = ; - clocks = <&dss_clkctrl DRA7_BB2D_CLKCTRL 0>; + clocks = <&dss_clkctrl DRA7_DSS_BB2D_CLKCTRL 0>; clock-names = "core"; }; }; @@ -1333,7 +1333,7 @@ ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&wkupaon_clkctrl DRA7_TIMER1_CLKCTRL 24>; + assigned-clocks = <&wkupaon_clkctrl DRA7_WKUPAON_TIMER1_CLKCTRL 24>; assigned-clock-parents = <&sys_32k_ck>; }; }; diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts index 8cbaf1c8117456a11125de5f1f9986e1c9078e7d..3b609d987d88300f68f1402057dfc9b47b8bb146 100644 --- a/arch/arm/boot/dts/imx23-evk.dts +++ b/arch/arm/boot/dts/imx23-evk.dts @@ -79,7 +79,6 @@ MX23_PAD_LCD_RESET__GPIO_1_18 MX23_PAD_PWM3__GPIO_1_29 MX23_PAD_PWM4__GPIO_1_30 - MX23_PAD_SSP1_DETECT__SSP1_DETECT >; fsl,drive-strength = ; fsl,voltage = ; diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index d07d8f83456d241a288be60ce729579b9dded12f..ccfa8e320be62e26eea26a970eca940e440e2752 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -5,6 +5,8 @@ * Author: Fabio Estevam */ +#include + / { aliases { backlight = &backlight; @@ -226,6 +228,7 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 + MX6QDL_PAD_SD3_DAT5__GPIO7_IO00 0x1b0b0 >; }; @@ -304,7 +307,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - non-removable; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi index b7ea37ad4e55c45f5615766f3fa30bf7d9eab4a4..bcec98b96411437c831d9e91ddaf2f63d657ea9e 100644 --- a/arch/arm/boot/dts/imx7ulp.dtsi +++ b/arch/arm/boot/dts/imx7ulp.dtsi @@ -259,7 +259,7 @@ interrupts = ; clocks = <&pcc2 IMX7ULP_CLK_WDG1>; assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>; - assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>; + assigned-clock-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>; timeout-sec = <40>; }; diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index 3be7cba603d5ac4a78881d3c0a455850dccd27db..26eaba3fa96f3df868c07ec400d61807646441b2 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -59,7 +59,7 @@ }; uart_A: serial@84c0 { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x84c0 0x18>; interrupts = ; fifo-size = <128>; @@ -67,7 +67,7 @@ }; uart_B: serial@84dc { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x84dc 0x18>; interrupts = ; status = "disabled"; @@ -105,7 +105,7 @@ }; uart_C: serial@8700 { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x8700 0x18>; interrupts = ; status = "disabled"; @@ -228,7 +228,7 @@ }; uart_AO: serial@4c0 { - compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart"; reg = <0x4c0 0x18>; interrupts = ; status = "disabled"; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index f80ddc98d3a2b20697c7daeae372c72d074cf96f..9997a5d0333a3f7339136dfe4eeecb9bfe148a55 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -736,27 +736,27 @@ }; &uart_AO { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart", "amlogic,meson-ao-uart"; + clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_A { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_B { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_C { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &usb0 { diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index b49b7cbaed4eed028221f1858be95227b7ebaf3c..94f1c03deccefa2e425110a5b2941db557b4a2d0 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -724,27 +724,27 @@ }; &uart_AO { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart", "amlogic,meson-ao-uart"; + clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_A { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_B { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_C { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &usb0 { diff --git a/arch/arm/boot/dts/omap3-beagle-ab4.dts b/arch/arm/boot/dts/omap3-beagle-ab4.dts new file mode 100644 index 0000000000000000000000000000000000000000..990ff2d8468684469dbe5704ebe6a75ef32bd66c --- /dev/null +++ b/arch/arm/boot/dts/omap3-beagle-ab4.dts @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-only +/dts-v1/; + +#include "omap3-beagle.dts" + +/ { + model = "TI OMAP3 BeagleBoard A to B4"; + compatible = "ti,omap3-beagle-ab4", "ti,omap3-beagle", "ti,omap3430", "ti,omap3"; +}; + +/* + * Workaround for capacitor C70 issue, see "Boards revision A and < B5" + * section at https://elinux.org/BeagleBoard_Community + */ + +/* Unusable as clocksource because of unreliable oscillator */ +&counter32k { + status = "disabled"; +}; + +/* Unusable as clockevent because of unreliable oscillator, allow to idle */ +&timer1_target { + /delete-property/ti,no-reset-on-init; + /delete-property/ti,no-idle; + timer@0 { + /delete-property/ti,timer-alwon; + }; +}; + +/* Preferred always-on timer for clocksource */ +&timer12_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + /* Always clocked by secure_32k_fck */ + }; +}; + +/* Preferred timer for clockevent */ +&timer2_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + assigned-clocks = <&gpt2_fck>; + assigned-clock-parents = <&sys_ck>; + }; +}; diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts index f9f34b8458e91c871de4a2e2d1093a306ba99cfd..0548b391334fdbf992beb9a3ae41c3edb7b741b4 100644 --- a/arch/arm/boot/dts/omap3-beagle.dts +++ b/arch/arm/boot/dts/omap3-beagle.dts @@ -304,39 +304,6 @@ phys = <0 &hsusb2_phy>; }; -/* Unusable as clocksource because of unreliable oscillator */ -&counter32k { - status = "disabled"; -}; - -/* Unusable as clockevent because if unreliable oscillator, allow to idle */ -&timer1_target { - /delete-property/ti,no-reset-on-init; - /delete-property/ti,no-idle; - timer@0 { - /delete-property/ti,timer-alwon; - }; -}; - -/* Preferred always-on timer for clocksource */ -&timer12_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - /* Always clocked by secure_32k_fck */ - }; -}; - -/* Preferred timer for clockevent */ -&timer2_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - assigned-clocks = <&gpt2_fck>; - assigned-clock-parents = <&sys_ck>; - }; -}; - &twl_gpio { ti,use-leds; /* pullups: BIT(1) */ diff --git a/arch/arm/boot/dts/spear320-hmi.dts b/arch/arm/boot/dts/spear320-hmi.dts index 367ba48aac3e56b1f6e6da4383f685b67c961077..b587e4ec11e5de305139bcf7e970211b3753868f 100644 --- a/arch/arm/boot/dts/spear320-hmi.dts +++ b/arch/arm/boot/dts/spear320-hmi.dts @@ -235,7 +235,6 @@ #address-cells = <1>; #size-cells = <0>; reg = <0x41>; - irq-over-gpio; irq-gpios = <&gpiopinctrl 29 0x4>; id = <0>; blocks = <0x5>; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts index 580ca497f3121f8eb9fe980dbc349600eaf11cee..f8c5899fbdba0554476bc79af16eb72d45349016 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts @@ -185,10 +185,6 @@ cap-sd-highspeed; cap-mmc-highspeed; /* All direction control is used */ - st,sig-dir-cmd; - st,sig-dir-dat0; - st,sig-dir-dat2; - st,sig-dir-dat31; st,sig-pin-fbclk; full-pwr-cycle; vmmc-supply = <&ab8500_ldo_aux3_reg>; diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index 383c632eba7bd7b17597fd680fa1cb7a9805e9ef..a9ed79b7f8716cfee6f1bc453d6e8ddc10d820ba 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -31,7 +31,6 @@ CONFIG_ARCH_BCM2835=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_AEABI=y CONFIG_KSM=y -CONFIG_CLEANCACHE=y CONFIG_CMA=y CONFIG_SECCOMP=y CONFIG_KEXEC=y diff --git a/arch/arm/configs/qcom_defconfig b/arch/arm/configs/qcom_defconfig index 0daa9c0d298e7f3a7cbba066c846bbffb9272eb3..9981566f20961816c99bbcddcb1d64dfa424efba 100644 --- a/arch/arm/configs/qcom_defconfig +++ b/arch/arm/configs/qcom_defconfig @@ -27,7 +27,6 @@ CONFIG_PCIE_QCOM=y CONFIG_SMP=y CONFIG_PREEMPT=y CONFIG_HIGHMEM=y -CONFIG_CLEANCACHE=y CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_CPU_IDLE=y diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c index 17c1c3bfe2f505f0eac3af720024ea608b26afc7..763c73beea2d046d10de5c22ec6195ab3591661e 100644 --- a/arch/arm/crypto/blake2s-shash.c +++ b/arch/arm/crypto/blake2s-shash.c @@ -13,12 +13,12 @@ static int crypto_blake2s_update_arm(struct shash_desc *desc, const u8 *in, unsigned int inlen) { - return crypto_blake2s_update(desc, in, inlen, blake2s_compress); + return crypto_blake2s_update(desc, in, inlen, false); } static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) { - return crypto_blake2s_final(desc, out, blake2s_compress); + return crypto_blake2s_final(desc, out, false); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 7d23d4bb2168bacb4f52dda176b7a5bc5402b5f0..6fe67963ba5a0885e68165e5ec591fed039dbcb9 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -288,6 +288,7 @@ */ #define ALT_UP(instr...) \ .pushsection ".alt.smp.init", "a" ;\ + .align 2 ;\ .long 9998b - . ;\ 9997: instr ;\ .if . - 9997b == 2 ;\ @@ -299,6 +300,7 @@ .popsection #define ALT_UP_B(label) \ .pushsection ".alt.smp.init", "a" ;\ + .align 2 ;\ .long 9998b - . ;\ W(b) . + (label - 9998b) ;\ .popsection diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index c92e42a5c8f75d2c03e3fd03cceff245e4da0bc0..8e94fe7ab5ebce19104948e7c4a3ad9739ab49ee 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -264,7 +264,6 @@ static inline int find_next_bit_le(const void *p, int size, int offset) #endif -#include #include /* diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index 6af68edfa53abc7a72789f23eb7f90efb8a6b0b5..bdc35c0e8dfb9482ff20759d5373c15cbc95d86b 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -96,6 +96,7 @@ unsigned long __get_wchan(struct task_struct *p); #define __ALT_SMP_ASM(smp, up) \ "9998: " smp "\n" \ " .pushsection \".alt.smp.init\", \"a\"\n" \ + " .align 2\n" \ " .long 9998b - .\n" \ " " up "\n" \ " .popsection\n" diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 36fbc332925263a6c72277ea19a5864bf78d2a88..32dbfd81f42a48a27c2927634d46a4d30609d7e1 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -497,7 +498,10 @@ do { \ } \ default: __err = __get_user_bad(); break; \ } \ - *(type *)(dst) = __val; \ + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) \ + put_unaligned(__val, (type *)(dst)); \ + else \ + *(type *)(dst) = __val; /* aligned by caller */ \ if (__err) \ goto err_label; \ } while (0) @@ -507,7 +511,9 @@ do { \ const type *__pk_ptr = (dst); \ unsigned long __dst = (unsigned long)__pk_ptr; \ int __err = 0; \ - type __val = *(type *)src; \ + type __val = IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \ + ? get_unaligned((type *)(src)) \ + : *(type *)(src); /* aligned by caller */ \ switch (sizeof(type)) { \ case 1: __put_user_asm_byte(__val, __dst, __err, ""); break; \ case 2: __put_user_asm_half(__val, __dst, __err, ""); break; \ diff --git a/arch/arm/kernel/atags_proc.c b/arch/arm/kernel/atags_proc.c index 3c2faf2bd124e8f6383c247cf0ddff1f72a32dd0..3ec2afe78423278defddb9b89249f6e313681d51 100644 --- a/arch/arm/kernel/atags_proc.c +++ b/arch/arm/kernel/atags_proc.c @@ -13,7 +13,7 @@ struct buffer { static ssize_t atags_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct buffer *b = PDE_DATA(file_inode(file)); + struct buffer *b = pde_data(file_inode(file)); return simple_read_from_buffer(buf, count, ppos, b->data, b->size); } diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 6daaa645ae5d98cd3dc51973e5d6ffa037969612..21413a9b7b6c6281f55a1c136547df48dd875673 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -263,9 +263,9 @@ static int __init omapdss_init_of(void) } r = of_platform_populate(node, NULL, NULL, &pdev->dev); + put_device(&pdev->dev); if (r) { pr_err("Unable to populate DSS submodule devices\n"); - put_device(&pdev->dev); return r; } diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index ccb0e3732c0dc15d72b133e57b98fe3cdf86aad5..31d1a21f60416db586ad28dbd3d829cc1a0ce860 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -752,8 +752,10 @@ static int __init _init_clkctrl_providers(void) for_each_matching_node(np, ti_clkctrl_match_table) { ret = _setup_clkctrl_provider(np); - if (ret) + if (ret) { + of_node_put(np); break; + } } return ret; diff --git a/arch/arm/mach-socfpga/Kconfig b/arch/arm/mach-socfpga/Kconfig index 43ddec677c0b3422390b7700fa1fc21fd6efd764..594edf9bbea44383486678a9f868f98cde2bb28f 100644 --- a/arch/arm/mach-socfpga/Kconfig +++ b/arch/arm/mach-socfpga/Kconfig @@ -2,6 +2,7 @@ menuconfig ARCH_INTEL_SOCFPGA bool "Altera SOCFPGA family" depends on ARCH_MULTI_V7 + select ARCH_HAS_RESET_CONTROLLER select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC @@ -18,6 +19,7 @@ menuconfig ARCH_INTEL_SOCFPGA select PL310_ERRATA_727915 select PL310_ERRATA_753970 if PL310 select PL310_ERRATA_769419 + select RESET_CONTROLLER if ARCH_INTEL_SOCFPGA config SOCFPGA_SUSPEND diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index adbb3817d0be74ccbb4eb59b59a75aa0a780137d..6f499559d19362885c289d38c3a854a33f10c2bb 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -1005,7 +1005,7 @@ static int __init noalign_setup(char *__unused) __setup("noalign", noalign_setup); /* - * This needs to be done after sysctl_init, otherwise sys/ will be + * This needs to be done after sysctl_init_bases(), otherwise sys/ will be * overwritten. Actually, this shouldn't be in sys/ at all since * it isn't a sysctl, and it doesn't contain sysctl information. * We now locate it in /proc/cpu/alignment instead. diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile index 14db56f49f0a3fb48e732b86eee0da3fab1306cf..6159010dac4a6d2ea8cbdb66766400d2b6f0638f 100644 --- a/arch/arm/probes/kprobes/Makefile +++ b/arch/arm/probes/kprobes/Makefile @@ -1,4 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +KASAN_SANITIZE_actions-common.o := n +KASAN_SANITIZE_actions-arm.o := n +KASAN_SANITIZE_actions-thumb.o := n obj-$(CONFIG_KPROBES) += core.o actions-common.o checkers-common.o obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o test-kprobes-objs := test-core.o diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index dc10d26cb432e8c6c7186e89834c3ffd62a0e6c3..09b885cc4db531fd18debbb2d3cd43e5aa5cdd58 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -120,7 +120,6 @@ config ARM64 select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP - select GENERIC_FIND_FIRST_BIT select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_IPI select GENERIC_IRQ_PROBE @@ -671,15 +670,42 @@ config ARM64_ERRATUM_1508412 config ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE bool +config ARM64_ERRATUM_2051678 + bool "Cortex-A510: 2051678: disable Hardware Update of the page table dirty bit" + default y + help + This options adds the workaround for ARM Cortex-A510 erratum ARM64_ERRATUM_2051678. + Affected Coretex-A510 might not respect the ordering rules for + hardware update of the page table's dirty bit. The workaround + is to not enable the feature on affected CPUs. + + If unsure, say Y. + +config ARM64_ERRATUM_2077057 + bool "Cortex-A510: 2077057: workaround software-step corrupting SPSR_EL2" + help + This option adds the workaround for ARM Cortex-A510 erratum 2077057. + Affected Cortex-A510 may corrupt SPSR_EL2 when the a step exception is + expected, but a Pointer Authentication trap is taken instead. The + erratum causes SPSR_EL1 to be copied to SPSR_EL2, which could allow + EL1 to cause a return to EL2 with a guest controlled ELR_EL2. + + This can only happen when EL2 is stepping EL1. + + When these conditions occur, the SPSR_EL2 value is unchanged from the + previous guest entry, and can be restored from the in-memory copy. + + If unsure, say Y. + config ARM64_ERRATUM_2119858 - bool "Cortex-A710: 2119858: workaround TRBE overwriting trace data in FILL mode" + bool "Cortex-A710/X2: 2119858: workaround TRBE overwriting trace data in FILL mode" default y depends on CORESIGHT_TRBE select ARM64_WORKAROUND_TRBE_OVERWRITE_FILL_MODE help - This option adds the workaround for ARM Cortex-A710 erratum 2119858. + This option adds the workaround for ARM Cortex-A710/X2 erratum 2119858. - Affected Cortex-A710 cores could overwrite up to 3 cache lines of trace + Affected Cortex-A710/X2 cores could overwrite up to 3 cache lines of trace data at the base of the buffer (pointed to by TRBASER_EL1) in FILL mode in the event of a WRAP event. @@ -762,14 +788,14 @@ config ARM64_ERRATUM_2253138 If unsure, say Y. config ARM64_ERRATUM_2224489 - bool "Cortex-A710: 2224489: workaround TRBE writing to address out-of-range" + bool "Cortex-A710/X2: 2224489: workaround TRBE writing to address out-of-range" depends on CORESIGHT_TRBE default y select ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE help - This option adds the workaround for ARM Cortex-A710 erratum 2224489. + This option adds the workaround for ARM Cortex-A710/X2 erratum 2224489. - Affected Cortex-A710 cores might write to an out-of-range address, not reserved + Affected Cortex-A710/X2 cores might write to an out-of-range address, not reserved for TRBE. Under some conditions, the TRBE might generate a write to the next virtually addressed page following the last page of the TRBE address space (i.e., the TRBLIMITR_EL1.LIMIT), instead of wrapping around to the base. @@ -779,6 +805,65 @@ config ARM64_ERRATUM_2224489 If unsure, say Y. +config ARM64_ERRATUM_2064142 + bool "Cortex-A510: 2064142: workaround TRBE register writes while disabled" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2064142. + + Affected Cortex-A510 core might fail to write into system registers after the + TRBE has been disabled. Under some conditions after the TRBE has been disabled + writes into TRBE registers TRBLIMITR_EL1, TRBPTR_EL1, TRBBASER_EL1, TRBSR_EL1, + and TRBTRG_EL1 will be ignored and will not be effected. + + Work around this in the driver by executing TSB CSYNC and DSB after collection + is stopped and before performing a system register write to one of the affected + registers. + + If unsure, say Y. + +config ARM64_ERRATUM_2038923 + bool "Cortex-A510: 2038923: workaround TRBE corruption with enable" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 2038923. + + Affected Cortex-A510 core might cause an inconsistent view on whether trace is + prohibited within the CPU. As a result, the trace buffer or trace buffer state + might be corrupted. This happens after TRBE buffer has been enabled by setting + TRBLIMITR_EL1.E, followed by just a single context synchronization event before + execution changes from a context, in which trace is prohibited to one where it + isn't, or vice versa. In these mentioned conditions, the view of whether trace + is prohibited is inconsistent between parts of the CPU, and the trace buffer or + the trace buffer state might be corrupted. + + Work around this in the driver by preventing an inconsistent view of whether the + trace is prohibited or not based on TRBLIMITR_EL1.E by immediately following a + change to TRBLIMITR_EL1.E with at least one ISB instruction before an ERET, or + two ISB instructions if no ERET is to take place. + + If unsure, say Y. + +config ARM64_ERRATUM_1902691 + bool "Cortex-A510: 1902691: workaround TRBE trace corruption" + depends on COMPILE_TEST # Until the CoreSight TRBE driver changes are in + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 1902691. + + Affected Cortex-A510 core might cause trace data corruption, when being written + into the memory. Effectively TRBE is broken and hence cannot be used to capture + trace data. + + Work around this problem in the driver by just preventing TRBE initialization on + affected cpus. The firmware must have disabled the access to TRBE for the kernel + on such implementations. This will cover the kernel for any firmware that doesn't + do this already. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 7d5d588001708856f3b15fcc7419a84bcb7ea9b0..21697449d762fa990873cbb5fd1f33365b8e3862 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -309,9 +309,6 @@ config ARCH_VISCONTI help This enables support for Toshiba Visconti SoCs Family. -config ARCH_VULCAN - def_bool n - config ARCH_XGENE bool "AppliedMicro X-Gene SOC Family" help diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 517519e6e87f147a2b613d793e6ad9e851870452..f84d4b489e0bad31810364804e8f076b98757d6a 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -107,6 +107,12 @@ no-map; }; + /* 32 MiB reserved for ARM Trusted Firmware (BL32) */ + secmon_reserved_bl32: secmon@5300000 { + reg = <0x0 0x05300000 0x0 0x2000000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts index d8838dde0f0f456b9816cb4981956fb997ee2f5e..4fb31c2ba31c44f930054a5307ef4a3c2c2b9a18 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts @@ -157,14 +157,6 @@ regulator-always-on; }; - reserved-memory { - /* TEE Reserved Memory */ - bl32_reserved: bl32@5000000 { - reg = <0x0 0x05300000 0x0 0x2000000>; - no-map; - }; - }; - sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi index 3e968b24419188e92437f4ea0f013cbece12e6a3..fd3fa82e4c3305366162c8acc02c7cae7c210de3 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi @@ -17,7 +17,7 @@ rtc1 = &vrtc; }; - dioo2133: audio-amplifier-0 { + dio2133: audio-amplifier-0 { compatible = "simple-audio-amplifier"; enable-gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_HIGH>; VCC-supply = <&vcc_5v>; @@ -219,7 +219,7 @@ audio-widgets = "Line", "Lineout"; audio-aux-devs = <&tdmout_b>, <&tdmout_c>, <&tdmin_a>, <&tdmin_b>, <&tdmin_c>, <&tdmin_lb>, - <&dioo2133>; + <&dio2133>; audio-routing = "TDMOUT_B IN 0", "FRDDR_A OUT 1", "TDMOUT_B IN 1", "FRDDR_B OUT 1", "TDMOUT_B IN 2", "FRDDR_C OUT 1", diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 6b457b2c30a4bc23ff9143a96845c7a2b4d6c330..aa14ea017a6130f20062837e7b22fb9084e1278c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -49,6 +49,12 @@ no-map; }; + /* 32 MiB reserved for ARM Trusted Firmware (BL32) */ + secmon_reserved_bl32: secmon@5300000 { + reg = <0x0 0x05300000 0x0 0x2000000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts index 212c6aa5a3b86a59154146e19df813feb11e07fa..5751c48620edf426b17e70fb4f16100a692321e1 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-bananapi-m5.dts @@ -123,7 +123,7 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; - enable-gpio = <&gpio GPIOE_2 GPIO_ACTIVE_HIGH>; + enable-gpio = <&gpio_ao GPIOE_2 GPIO_ACTIVE_HIGH>; enable-active-high; regulator-always-on; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi index 0bd1e98a0eef6083e92fd74c33298533ce02b61a..ddb1b345397fd12e778cb3afc722dc1c225c46be 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-odroid.dtsi @@ -48,7 +48,7 @@ regulator-max-microvolt = <3300000>; vin-supply = <&vcc_5v>; - enable-gpio = <&gpio GPIOE_2 GPIO_ACTIVE_HIGH>; + enable-gpio = <&gpio_ao GPIOE_2 GPIO_OPEN_DRAIN>; enable-active-high; regulator-always-on; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts index 427475846fc702feb5d9d1d7e5e0416c249df8a6..a5d79f2f7c1960fbd13d2debbe27b919f5336a3b 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts @@ -203,14 +203,6 @@ regulator-always-on; }; - reserved-memory { - /* TEE Reserved Memory */ - bl32_reserved: bl32@5000000 { - reg = <0x0 0x05300000 0x0 0x2000000>; - no-map; - }; - }; - sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts index d74e738e40706c811c585a19a36ae989018ba278..c03f4e1833893b84baae2d810c41b44ff98ea270 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-kontron-sl28.dts @@ -157,6 +157,10 @@ }; }; +&ftm_alarm0 { + status = "okay"; +}; + &gpio1 { gpio-line-names = "", "", "", "", "", "", "", "", diff --git a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi index f3e3418f7edc0ec270a5ff3f7c42a702534b7a0b..2d4a472af6a9b3cc3b52bcd7dada7873608fa278 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq-librem5.dtsi @@ -1115,8 +1115,8 @@ status = "okay"; ports { - port@1 { - reg = <1>; + port@0 { + reg = <0>; mipi1_sensor_ep: endpoint { remote-endpoint = <&camera1_ep>; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 2df2510d011841d4eecaae2de6a67bebc3765a1b..e92ebb6147e6d72ca27e5cb34504d3d219a58f49 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -554,7 +554,7 @@ assigned-clock-rates = <0>, <0>, <0>, <594000000>; status = "disabled"; - port@0 { + port { lcdif_mipi_dsi: endpoint { remote-endpoint = <&mipi_dsi_lcdif_in>; }; @@ -1151,8 +1151,8 @@ #address-cells = <1>; #size-cells = <0>; - port@0 { - reg = <0>; + port@1 { + reg = <1>; csi1_mipi_ep: endpoint { remote-endpoint = <&csi1_ep>; @@ -1203,8 +1203,8 @@ #address-cells = <1>; #size-cells = <0>; - port@0 { - reg = <0>; + port@1 { + reg = <1>; csi2_mipi_ep: endpoint { remote-endpoint = <&csi2_ep>; diff --git a/arch/arm64/boot/dts/freescale/mba8mx.dtsi b/arch/arm64/boot/dts/freescale/mba8mx.dtsi index f27e3c8de916ab124b02b0c7d00e47ff4e34b6ad..ce6d5bdba0a893444f35cedf83844615beddbfdd 100644 --- a/arch/arm64/boot/dts/freescale/mba8mx.dtsi +++ b/arch/arm64/boot/dts/freescale/mba8mx.dtsi @@ -91,7 +91,7 @@ sound { compatible = "fsl,imx-audio-tlv320aic32x4"; - model = "tqm-tlv320aic32"; + model = "imx-audio-tlv320aic32x4"; ssi-controller = <&sai3>; audio-codec = <&tlv320aic3x04>; }; diff --git a/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts b/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts index a5a24f9f46c5841fbf7c199b09e1466f73299db1..b210cc07c539539907ea2e538f41a6ac5503d8bf 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts +++ b/arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dts @@ -15,8 +15,18 @@ model = "Texas Instruments J721S2 EVM"; chosen { - stdout-path = "serial10:115200n8"; - bootargs = "console=ttyS10,115200n8 earlycon=ns16550a,mmio32,2880000"; + stdout-path = "serial2:115200n8"; + bootargs = "console=ttyS2,115200n8 earlycon=ns16550a,mmio32,2880000"; + }; + + aliases { + serial1 = &mcu_uart0; + serial2 = &main_uart8; + mmc0 = &main_sdhci0; + mmc1 = &main_sdhci1; + can0 = &main_mcan16; + can1 = &mcu_mcan0; + can2 = &mcu_mcan1; }; evm_12v0: fixedregulator-evm12v0 { diff --git a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi index 80d3cae03e8852dd6e6bbec2f22b1d999f91c06f..fe5234c40f6ce891b82243afaf3cccd8f077a29f 100644 --- a/arch/arm64/boot/dts/ti/k3-j721s2.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721s2.dtsi @@ -21,28 +21,6 @@ #address-cells = <2>; #size-cells = <2>; - aliases { - serial0 = &wkup_uart0; - serial1 = &mcu_uart0; - serial2 = &main_uart0; - serial3 = &main_uart1; - serial4 = &main_uart2; - serial5 = &main_uart3; - serial6 = &main_uart4; - serial7 = &main_uart5; - serial8 = &main_uart6; - serial9 = &main_uart7; - serial10 = &main_uart8; - serial11 = &main_uart9; - mmc0 = &main_sdhci0; - mmc1 = &main_sdhci1; - can0 = &main_mcan16; - can1 = &mcu_mcan0; - can2 = &mcu_mcan1; - can3 = &main_mcan3; - can4 = &main_mcan5; - }; - chosen { }; cpus { diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index d955ade5df7c0b401eec0d9bb3883be4e63f2f9f..5d460f6b7675e9fa8f68822c7adf1798daba98a0 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -249,7 +249,7 @@ __lse__cmpxchg_case_##name##sz(volatile void *ptr, \ " mov %" #w "[tmp], %" #w "[old]\n" \ " cas" #mb #sfx "\t%" #w "[tmp], %" #w "[new], %[v]\n" \ " mov %" #w "[ret], %" #w "[tmp]" \ - : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr), \ + : [ret] "+r" (x0), [v] "+Q" (*(u##sz *)ptr), \ [tmp] "=&r" (tmp) \ : [old] "r" (x1), [new] "r" (x2) \ : cl); \ diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h index 81a3e519b07db7cca88107379f83790290884f29..9b3c787132d220e08733308503a33aa6d00be2de 100644 --- a/arch/arm64/include/asm/bitops.h +++ b/arch/arm64/include/asm/bitops.h @@ -18,7 +18,6 @@ #include #include -#include #include #include diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index f9bef42c1411af12a10c79c7087994a77a752542..497acf134d99231bc6d307eb3ab287e7319f8169 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -243,7 +243,7 @@ static inline void __cmpwait_case_##sz(volatile void *ptr, \ " cbnz %" #w "[tmp], 1f\n" \ " wfe\n" \ "1:" \ - : [tmp] "=&r" (tmp), [v] "+Q" (*(unsigned long *)ptr) \ + : [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr) \ : [val] "r" (val)); \ } diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 19b8441aa8f26537adbbd22256b953bbf1fab02f..999b9149f85681e014d7fa6ab539edcfc621cdcb 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -73,7 +73,9 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_CORTEX_A510 0xD46 #define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define APM_CPU_PART_POTENZA 0x000 @@ -115,7 +117,9 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) #define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9e1c1aef9ebd64ff4a83c660172e6d94d1c58cc5..b217941713a8d652a710795af632e2ed8ab5576d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -347,6 +347,7 @@ static const struct midr_range trbe_overwrite_fill_mode_cpus[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_2119858 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0), #endif {}, }; @@ -371,6 +372,7 @@ static struct midr_range trbe_write_out_of_range_cpus[] = { #endif #ifdef CONFIG_ARM64_ERRATUM_2224489 MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_RANGE(MIDR_CORTEX_X2, 0, 0, 2, 0), #endif {}, }; @@ -597,6 +599,41 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus), }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_2077057 + { + .desc = "ARM erratum 2077057", + .capability = ARM64_WORKAROUND_2077057, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2), + }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_2064142 + { + .desc = "ARM erratum 2064142", + .capability = ARM64_WORKAROUND_2064142, + + /* Cortex-A510 r0p0 - r0p2 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) + }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_2038923 + { + .desc = "ARM erratum 2038923", + .capability = ARM64_WORKAROUND_2038923, + + /* Cortex-A510 r0p0 - r0p2 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2) + }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1902691 + { + .desc = "ARM erratum 1902691", + .capability = ARM64_WORKAROUND_1902691, + + /* Cortex-A510 r0p0 - r0p1 */ + ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1) + }, #endif { } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index a46ab3b1c4d5f9e949dc3692bdebb5e470161fd0..e5f23dab1c8df8acc56a64d982cb065984167ecb 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1645,6 +1645,9 @@ static bool cpu_has_broken_dbm(void) MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), /* Kryo4xx Silver (rdpe => r1p0) */ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2051678 + MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2), #endif {}, }; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 0fb58fed54cb2b2cac863cf945e02a2047b7505e..e4103e085681143e5f285c91d428f152d7013a6d 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -33,8 +33,8 @@ */ -static void start_backtrace(struct stackframe *frame, unsigned long fp, - unsigned long pc) +static notrace void start_backtrace(struct stackframe *frame, unsigned long fp, + unsigned long pc) { frame->fp = fp; frame->pc = pc; @@ -55,6 +55,7 @@ static void start_backtrace(struct stackframe *frame, unsigned long fp, frame->prev_fp = 0; frame->prev_type = STACK_TYPE_UNKNOWN; } +NOKPROBE_SYMBOL(start_backtrace); /* * Unwind from one frame record (A) to the next frame record (B). diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 60813497a38154db5dae5c084051228d8ae14deb..172452f79e462ed6ec6b462a19a80f0db4c1e857 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -29,8 +29,11 @@ ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO +# -Wmissing-prototypes and -Wmissing-declarations are removed from +# the CFLAGS of vgettimeofday.c to make possible to build the +# kernel with CONFIG_WERROR enabled. CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \ - $(CC_FLAGS_LTO) + $(CC_FLAGS_LTO) -Wmissing-prototypes -Wmissing-declarations KASAN_SANITIZE := n KCSAN_SANITIZE := n UBSAN_SANITIZE := n diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a4a0063df456ca0f963f62ece504e9991edcf5da..ecc5958e27fe2b3fc69b9b1121a626495cb13c46 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -797,6 +797,24 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret) xfer_to_guest_mode_work_pending(); } +/* + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while + * the vCPU is running. + * + * This must be noinstr as instrumentation may make use of RCU, and this is not + * safe during the EQS. + */ +static int noinstr kvm_arm_vcpu_enter_exit(struct kvm_vcpu *vcpu) +{ + int ret; + + guest_state_enter_irqoff(); + ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); + guest_state_exit_irqoff(); + + return ret; +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -881,9 +899,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) * Enter the guest */ trace_kvm_entry(*vcpu_pc(vcpu)); - guest_enter_irqoff(); + guest_timing_enter_irqoff(); - ret = kvm_call_hyp_ret(__kvm_vcpu_run, vcpu); + ret = kvm_arm_vcpu_enter_exit(vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -918,26 +936,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_arch_vcpu_ctxsync_fp(vcpu); /* - * We may have taken a host interrupt in HYP mode (ie - * while executing the guest). This interrupt is still - * pending, as we haven't serviced it yet! + * We must ensure that any pending interrupts are taken before + * we exit guest timing so that timer ticks are accounted as + * guest time. Transiently unmask interrupts so that any + * pending interrupts are taken. * - * We're now back in SVC mode, with interrupts - * disabled. Enabling the interrupts now will have - * the effect of taking the interrupt again, in SVC - * mode this time. + * Per ARM DDI 0487G.b section D1.13.4, an ISB (or other + * context synchronization event) is necessary to ensure that + * pending interrupts are taken. */ local_irq_enable(); + isb(); + local_irq_disable(); + + guest_timing_exit_irqoff(); + + local_irq_enable(); - /* - * We do local_irq_enable() before calling guest_exit() so - * that if a timer interrupt hits while running the guest we - * account that tick as being spent in the guest. We enable - * preemption after calling guest_exit() so that if we get - * preempted we make sure ticks after that is not counted as - * guest time. - */ - guest_exit(); trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); /* Exit types that need handling before we can be preempted */ diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index fd2dd26caf910287c005f109a3640e7980bfe442..e3140abd2e2eb3758e756457f0790361ba208a49 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -228,6 +228,14 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) { struct kvm_run *run = vcpu->run; + if (ARM_SERROR_PENDING(exception_index)) { + /* + * The SError is handled by handle_exit_early(). If the guest + * survives it will re-execute the original instruction. + */ + return 1; + } + exception_index = ARM_EXCEPTION_CODE(exception_index); switch (exception_index) { diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 0418399e0a2016738c632ef286ec985febc24cdb..c5d0097154020a11410b821e3bcfaa2013ebd9a0 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) { - write_sysreg_el1(val, SYS_SPSR); + if (has_vhe()) + write_sysreg_el1(val, SYS_SPSR); + else + __vcpu_sys_reg(vcpu, SPSR_EL1) = val; } static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 58e14f8ead23c113b3177d1e1b5b499f5c015000..701cfb964905df6384168380505408b0fc4be0a1 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -402,6 +402,24 @@ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } +static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + /* + * Check for the conditions of Cortex-A510's #2077057. When these occur + * SPSR_EL2 can't be trusted, but isn't needed either as it is + * unchanged from the value in vcpu_gp_regs(vcpu)->pstate. + * Are we single-stepping the guest, and took a PAC exception from the + * active-not-pending state? + */ + if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) && + vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && + *vcpu_cpsr(vcpu) & DBG_SPSR_SS && + ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC) + write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); + + vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR); +} + /* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the @@ -413,7 +431,7 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) * Save PSTATE early so that we can evaluate the vcpu mode * early on. */ - vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR); + synchronize_vcpu_pstate(vcpu, exit_code); /* * Check whether we want to repaint the state one way or @@ -424,7 +442,8 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); - if (ARM_SERROR_PENDING(*exit_code)) { + if (ARM_SERROR_PENDING(*exit_code) && + ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) { u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); /* diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 844a6f003fd544ebd349e0b9c08463b7e42aaf6c..2cb3867eb7c2bad97ed2e9eb1695e4dcd9e233bb 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -983,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, */ stage2_put_pte(ptep, mmu, addr, level, mm_ops); - if (need_flush) { - kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); - - dcache_clean_inval_poc((unsigned long)pte_follow, - (unsigned long)pte_follow + - kvm_granule_size(level)); - } + if (need_flush && mm_ops->dcache_clean_inval_poc) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); if (childp) mm_ops->put_page(childp); @@ -1151,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable *pgt = arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep; - kvm_pte_t *pte_follow; if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) return 0; - pte_follow = kvm_pte_follow(pte, mm_ops); - dcache_clean_inval_poc((unsigned long)pte_follow, - (unsigned long)pte_follow + - kvm_granule_size(level)); + if (mm_ops->dcache_clean_inval_poc) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); return 0; } diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 20db2f281cf23b6101ca9bce46511fb797a1f025..4fb419f7b8b61a93171860b77e8fd45253affb5a 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -983,6 +983,9 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; /* IDbits */ val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; + /* SEIS */ + if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) + val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT); /* A3V */ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; /* EOImode */ diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index a33d4366b326ba1ab7be7d21983f2b2110d953d9..b549af8b1dc2c9401ae8d8dbe3cd6fb7dc6a6a05 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -609,6 +609,18 @@ static int __init early_gicv4_enable(char *buf) } early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); +static const struct midr_range broken_seis[] = { + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), + {}, +}; + +static bool vgic_v3_broken_seis(void) +{ + return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) && + is_midr_in_range_list(read_cpuid_id(), broken_seis)); +} + /** * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller * @info: pointer to the GIC description @@ -676,9 +688,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) group1_trap = true; } - if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) { - kvm_info("GICv3 with locally generated SEI\n"); + if (vgic_v3_broken_seis()) { + kvm_info("GICv3 with broken locally generated SEI\n"); + kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK; group0_trap = true; group1_trap = true; if (ich_vtr_el2 & ICH_VTR_TDS_MASK) diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c index c0181e60cc98e86e9695e6135466d1864d13af7b..4894553096951f326944207737d28fdddb4b1e4c 100644 --- a/arch/arm64/mm/extable.c +++ b/arch/arm64/mm/extable.c @@ -40,8 +40,8 @@ static bool ex_handler_load_unaligned_zeropad(const struct exception_table_entry *ex, struct pt_regs *regs) { - int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->type); - int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->type); + int reg_data = FIELD_GET(EX_DATA_REG_DATA, ex->data); + int reg_addr = FIELD_GET(EX_DATA_REG_ADDR, ex->data); unsigned long data, addr, offset; addr = pt_regs_read_reg(regs, reg_addr); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index a8834434af99ae0bdbaa4b5f36d522b4370702bf..db63cc885771a527b37f91e1eff0bf1f9432dbee 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -172,7 +172,7 @@ int pfn_is_map_memory(unsigned long pfn) } EXPORT_SYMBOL(pfn_is_map_memory); -static phys_addr_t memory_limit = PHYS_ADDR_MAX; +static phys_addr_t memory_limit __ro_after_init = PHYS_ADDR_MAX; /* * Limit the memory size that was specified via FDT. diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 870c39537dd09c0ce983c4c2af5844e43e441c81..9c65b1e25a96506a874e578ece7bdce6ec52c309 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -55,6 +55,10 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_1902691 +WORKAROUND_2038923 +WORKAROUND_2064142 +WORKAROUND_2077057 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE WORKAROUND_TRBE_WRITE_OUT_OF_RANGE diff --git a/arch/csky/include/asm/bitops.h b/arch/csky/include/asm/bitops.h index 02b72a0007671308878bae4de47eda256b442324..72e1b2aa29a07d1c913e6951d25784b69008e307 100644 --- a/arch/csky/include/asm/bitops.h +++ b/arch/csky/include/asm/bitops.h @@ -59,7 +59,6 @@ static __always_inline unsigned long __fls(unsigned long x) #include #include -#include #ifndef _LINUX_BITOPS_H #error only can be included directly diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h index c867a80cab5b583b518ac67c94eb3790523e822b..4489e3d6edd3d91a8e595acfcede0a29e04e9189 100644 --- a/arch/h8300/include/asm/bitops.h +++ b/arch/h8300/include/asm/bitops.h @@ -168,7 +168,6 @@ static inline unsigned long __ffs(unsigned long word) return result; } -#include #include #include #include diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h index 71429f756af0f45a846598529055b34b1d70f269..75d6ba3643b8d14d9314d1b45ba94df7b57d8950 100644 --- a/arch/hexagon/include/asm/bitops.h +++ b/arch/hexagon/include/asm/bitops.h @@ -271,7 +271,6 @@ static inline unsigned long __fls(unsigned long word) } #include -#include #include #include diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 703952819e10e01bef5abe33bd6c169e68af83b3..a7e01573abd831c89f065a1ad522ecddab9a9af0 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -318,7 +318,7 @@ config ARCH_PROC_KCORE_TEXT depends on PROC_KCORE config IA64_MCA_RECOVERY - tristate "MCA recovery from errors other than TLB." + bool "MCA recovery from errors other than TLB." config IA64_PALINFO tristate "/proc/pal support" diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h index 2f24ee6459d20ff49e07ebdb1aa5c137c85e1953..577be93c0818cfe0ee1ba353b8f5ef1957a7caa4 100644 --- a/arch/ia64/include/asm/bitops.h +++ b/arch/ia64/include/asm/bitops.h @@ -441,8 +441,6 @@ static __inline__ unsigned long __arch_hweight64(unsigned long x) #endif /* __KERNEL__ */ -#include - #ifdef __KERNEL__ #include diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index a25ab9b37953e75d6d27ff277b22f6b2e3e136ca..bd3ba276e69c3ca943e2f95aa2794b1b41f1363a 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -282,7 +282,7 @@ salinfo_event_open(struct inode *inode, struct file *file) static ssize_t salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - struct salinfo_data *data = PDE_DATA(file_inode(file)); + struct salinfo_data *data = pde_data(file_inode(file)); char cmd[32]; size_t size; int i, n, cpu = -1; @@ -340,7 +340,7 @@ static const struct proc_ops salinfo_event_proc_ops = { static int salinfo_log_open(struct inode *inode, struct file *file) { - struct salinfo_data *data = PDE_DATA(inode); + struct salinfo_data *data = pde_data(inode); if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -365,7 +365,7 @@ salinfo_log_open(struct inode *inode, struct file *file) static int salinfo_log_release(struct inode *inode, struct file *file) { - struct salinfo_data *data = PDE_DATA(inode); + struct salinfo_data *data = pde_data(inode); if (data->state == STATE_NO_DATA) { vfree(data->log_buffer); @@ -433,7 +433,7 @@ retry: static ssize_t salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { - struct salinfo_data *data = PDE_DATA(file_inode(file)); + struct salinfo_data *data = pde_data(file_inode(file)); u8 *buf; u64 bufsize; @@ -494,7 +494,7 @@ salinfo_log_clear(struct salinfo_data *data, int cpu) static ssize_t salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - struct salinfo_data *data = PDE_DATA(file_inode(file)); + struct salinfo_data *data = pde_data(file_inode(file)); char cmd[32]; size_t size; u32 offset; diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c index acb55a41260dd0ab9ade073d28732dee0ee822f6..2bcdd7d3a1adad58e09e0fceb6c07c9aa06fb6d9 100644 --- a/arch/ia64/pci/fixup.c +++ b/arch/ia64/pci/fixup.c @@ -76,5 +76,5 @@ static void pci_fixup_video(struct pci_dev *pdev) } } } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index a4b6c7108465796b8930b948705207e2973aa24c..bc9952f8be6675d7e18b1eb6bf3c669f069f8bfb 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -45,7 +45,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 2db721965520252bbd1ecbbd282112f3dfef3ffc..a77269c6e5bacfc10258ca0201b20db46d7ef06f 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -41,7 +41,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index c266a704eecdc1556a98ba9cec3639696d4b770e..7a74efa6b9a1a0d4126e4667194f3ebf8a864626 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -48,7 +48,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index f644f08dd6ed886a5b3ed92221dab5b3f81d385f..a5323bf2eb3336a89b909b0c3f57446f1f50ba71 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -38,7 +38,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index e4924650b6875441fdf2ae81257a3e01230624b0..5e80aa0869d541092e75cc913a5da846599f3c95 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -40,7 +40,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 24113871ea7698880848357f58b1e787ad2b3dcd..e84326a3f62db53c0d889b114cf044dbe46d1b58 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -39,7 +39,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 6a7e4be70eea9a3f717d2ebf641ba68fae6ee26d..337552f433390aaee3df02c6a2d717b89eecd0ac 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -59,7 +59,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 1d223247aff08feb2170ec7de2cb4a68c63b6bd4..7b688f7d272a2980e50879628d4445046d6152e1 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -37,7 +37,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 961f789f96c9470a7b4f99de5ad8ebaf2db202f4..7c2cb31d63dd8ebba7b303c3087f7898b94137bb 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -38,7 +38,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index ff4b5e469390b577f159d99fa7db31c5e610b6f0..ca43897af26de2c8357e302e3ea17f2015353de7 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -39,7 +39,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 5f228621d0ccbb0f3726698f403f20dc3e6ad56f..e3d515f37144aa33e1d1c59e0f0d8892bbbbf778 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -35,7 +35,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index a600cb9e68c2567aa66f777ab5df2c09d73886f6..d601606c969b894e72b70bacfa22763c9e1d0fcc 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -35,7 +35,6 @@ CONFIG_IOSCHED_BFQ=m CONFIG_BINFMT_AOUT=m CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set -CONFIG_CLEANCACHE=y CONFIG_ZPOOL=m CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h index 7b93e1fd8ffa902f3cdbba84afe87297fc1accd1..51283db53667f416ccae5356bb94609edbf301dc 100644 --- a/arch/m68k/include/asm/bitops.h +++ b/arch/m68k/include/asm/bitops.h @@ -529,6 +529,4 @@ static inline int __fls(int x) #include #endif /* __KERNEL__ */ -#include - #endif /* _M68K_BITOPS_H */ diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index edf6c1577449f20064fa535ba2f13074a7d4a207..058446f01487c34489fae8fab59c2af40ef61ab7 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -32,7 +32,6 @@ config MIPS select GENERIC_ATOMIC64 if !64BIT select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE - select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY select GENERIC_IOMAP select GENERIC_IRQ_PROBE diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index 3e336b3dbb1097769019d55badebe54e3410c6fd..ab6e3dc0bc1d00a657d7182705f2b55821fc4075 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -83,6 +83,8 @@ label = "HDMI OUT"; type = "a"; + ddc-en-gpios = <&gpa 25 GPIO_ACTIVE_HIGH>; + port { hdmi_con: endpoint { remote-endpoint = <&dw_hdmi_out>; @@ -114,17 +116,6 @@ gpio = <&gpf 14 GPIO_ACTIVE_LOW>; enable-active-high; }; - - hdmi_power: fixedregulator@3 { - compatible = "regulator-fixed"; - - regulator-name = "hdmi_power"; - regulator-min-microvolt = <5000000>; - regulator-max-microvolt = <5000000>; - - gpio = <&gpa 25 0>; - enable-active-high; - }; }; &ext { @@ -576,8 +567,6 @@ pinctrl-names = "default"; pinctrl-0 = <&pins_hdmi_ddc>; - hdmi-5v-supply = <&hdmi_power>; - ports { #address-cells = <1>; #size-cells = <0>; diff --git a/arch/mips/cavium-octeon/octeon-memcpy.S b/arch/mips/cavium-octeon/octeon-memcpy.S index 0a515cde1c183723db44c1a42db3987a9014a6d6..25860fba6218d888559b9a683c79a9fb975616a5 100644 --- a/arch/mips/cavium-octeon/octeon-memcpy.S +++ b/arch/mips/cavium-octeon/octeon-memcpy.S @@ -74,7 +74,7 @@ #define EXC(inst_reg,addr,handler) \ 9: inst_reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous /* diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h index 6ffdd4b5e1d0606a1a451f7ec0f8a284834230c6..336ac9b6523502e197e05ccfb3772d9c21733194 100644 --- a/arch/mips/include/asm/asm.h +++ b/arch/mips/include/asm/asm.h @@ -285,7 +285,7 @@ symbol = value #define PTR_SCALESHIFT 2 -#define PTR .word +#define PTR_WD .word #define PTRSIZE 4 #define PTRLOG 2 #endif @@ -310,7 +310,7 @@ symbol = value #define PTR_SCALESHIFT 3 -#define PTR .dword +#define PTR_WD .dword #define PTRSIZE 8 #define PTRLOG 3 #endif diff --git a/arch/mips/include/asm/bitops.h b/arch/mips/include/asm/bitops.h index 3812082b82956508d4a09e39ea48d0b24ce725f6..b4bf754f7db3f7fda634f8d659c8d0d120d05479 100644 --- a/arch/mips/include/asm/bitops.h +++ b/arch/mips/include/asm/bitops.h @@ -444,7 +444,6 @@ static inline int ffs(int word) } #include -#include #ifdef __KERNEL__ diff --git a/arch/mips/include/asm/ftrace.h b/arch/mips/include/asm/ftrace.h index b463f2aa5a613caf29b13adb43b7a8097fb98bf2..db497a8167da299d64005b5789136d82ce6c8106 100644 --- a/arch/mips/include/asm/ftrace.h +++ b/arch/mips/include/asm/ftrace.h @@ -32,7 +32,7 @@ do { \ ".previous\n" \ \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR) "\t1b, 3b\n\t" \ + STR(PTR_WD) "\t1b, 3b\n\t" \ ".previous\n" \ \ : [tmp_dst] "=&r" (dst), [tmp_err] "=r" (error)\ @@ -54,7 +54,7 @@ do { \ ".previous\n" \ \ ".section\t__ex_table,\"a\"\n\t"\ - STR(PTR) "\t1b, 3b\n\t" \ + STR(PTR_WD) "\t1b, 3b\n\t" \ ".previous\n" \ \ : [tmp_err] "=r" (error) \ diff --git a/arch/mips/include/asm/r4kcache.h b/arch/mips/include/asm/r4kcache.h index af3788589ee6d0e7868cf7ed668fb9e3ad5c772f..431a1c9d53fc7124d6f898bedcd0d6a3b1f760d3 100644 --- a/arch/mips/include/asm/r4kcache.h +++ b/arch/mips/include/asm/r4kcache.h @@ -119,7 +119,7 @@ static inline void flush_scache_line(unsigned long addr) " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ - " "STR(PTR)" 1b, 3b \n" \ + " "STR(PTR_WD)" 1b, 3b \n" \ " .previous" \ : "+r" (__err) \ : "i" (op), "r" (addr), "i" (-EFAULT)); \ @@ -142,7 +142,7 @@ static inline void flush_scache_line(unsigned long addr) " j 2b \n" \ " .previous \n" \ " .section __ex_table,\"a\" \n" \ - " "STR(PTR)" 1b, 3b \n" \ + " "STR(PTR_WD)" 1b, 3b \n" \ " .previous" \ : "+r" (__err) \ : "i" (op), "r" (addr), "i" (-EFAULT)); \ diff --git a/arch/mips/include/asm/unaligned-emul.h b/arch/mips/include/asm/unaligned-emul.h index 2022b18944b979ea043e35656f806b0f8403f4df..9af0f4d3d288c47b77e4270b9476f9a5fc17ceb9 100644 --- a/arch/mips/include/asm/unaligned-emul.h +++ b/arch/mips/include/asm/unaligned-emul.h @@ -20,8 +20,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -41,8 +41,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -74,10 +74,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -102,8 +102,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -125,8 +125,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -145,8 +145,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -178,10 +178,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -223,14 +223,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -255,8 +255,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT));\ @@ -276,8 +276,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -296,8 +296,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -325,10 +325,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ @@ -365,14 +365,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ @@ -398,8 +398,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -419,8 +419,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -452,10 +452,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -481,8 +481,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -504,8 +504,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -524,8 +524,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -557,10 +557,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -602,14 +602,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (value), "=r" (res) \ : "r" (addr), "i" (-EFAULT)); \ @@ -632,8 +632,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT));\ @@ -653,8 +653,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -673,8 +673,8 @@ do { \ "j\t3b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ + STR(PTR_WD)"\t1b, 4b\n\t" \ + STR(PTR_WD)"\t2b, 4b\n\t" \ ".previous" \ : "=r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT)); \ @@ -703,10 +703,10 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ @@ -743,14 +743,14 @@ do { \ "j\t10b\n\t" \ ".previous\n\t" \ ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ + STR(PTR_WD)"\t1b, 11b\n\t" \ + STR(PTR_WD)"\t2b, 11b\n\t" \ + STR(PTR_WD)"\t3b, 11b\n\t" \ + STR(PTR_WD)"\t4b, 11b\n\t" \ + STR(PTR_WD)"\t5b, 11b\n\t" \ + STR(PTR_WD)"\t6b, 11b\n\t" \ + STR(PTR_WD)"\t7b, 11b\n\t" \ + STR(PTR_WD)"\t8b, 11b\n\t" \ ".previous" \ : "=&r" (res) \ : "r" (value), "r" (addr), "i" (-EFAULT) \ diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index a39ec755e4c24404c0ba76da6d89909fd123f5d4..750fe569862b634afc22a80bdc6907d97ab0bb07 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -1258,10 +1258,10 @@ fpu_emul: " j 10b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1333,10 +1333,10 @@ fpu_emul: " j 10b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1404,10 +1404,10 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1474,10 +1474,10 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1589,14 +1589,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1708,14 +1708,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1827,14 +1827,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -1945,14 +1945,14 @@ fpu_emul: " j 9b\n" " .previous\n" " .section __ex_table,\"a\"\n" - STR(PTR) " 1b,8b\n" - STR(PTR) " 2b,8b\n" - STR(PTR) " 3b,8b\n" - STR(PTR) " 4b,8b\n" - STR(PTR) " 5b,8b\n" - STR(PTR) " 6b,8b\n" - STR(PTR) " 7b,8b\n" - STR(PTR) " 0b,8b\n" + STR(PTR_WD) " 1b,8b\n" + STR(PTR_WD) " 2b,8b\n" + STR(PTR_WD) " 3b,8b\n" + STR(PTR_WD) " 4b,8b\n" + STR(PTR_WD) " 5b,8b\n" + STR(PTR_WD) " 6b,8b\n" + STR(PTR_WD) " 7b,8b\n" + STR(PTR_WD) " 0b,8b\n" " .previous\n" " .set pop\n" : "+&r"(rt), "=&r"(rs), @@ -2007,7 +2007,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "=&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV) @@ -2065,7 +2065,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "+&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV)); @@ -2126,7 +2126,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "=&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV) @@ -2189,7 +2189,7 @@ fpu_emul: "j 2b\n" ".previous\n" ".section __ex_table,\"a\"\n" - STR(PTR) " 1b,3b\n" + STR(PTR_WD) " 1b,3b\n" ".previous\n" : "+&r"(res), "+&r"(err) : "r"(vaddr), "i"(SIGSEGV)); diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index cbf6db98cfb38ac3e43364eef2cb4bf0d6455b39..2748c55820c24666b8b36b103e88ca1deeaa8c01 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -23,14 +23,14 @@ #define EX(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,fault; \ + PTR_WD 9b,fault; \ .previous #define EX2(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,fault; \ - PTR 9b+4,fault; \ + PTR_WD 9b,fault; \ + PTR_WD 9b+4,fault; \ .previous .set mips1 diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index b91e91106475614390561c6a92af035c98d9b9bc..2e687c60bc4f1938db72c8326bbb9455f05304bf 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -31,7 +31,7 @@ .ex\@: \insn \reg, \src .set pop .section __ex_table,"a" - PTR .ex\@, fault + PTR_WD .ex\@, fault .previous .endm diff --git a/arch/mips/kernel/relocate_kernel.S b/arch/mips/kernel/relocate_kernel.S index f3c908abdbb803bb2f90829dd2eba861442f71e7..cfde14b48fd8dbf9601d45c600fcddc5b09fb325 100644 --- a/arch/mips/kernel/relocate_kernel.S +++ b/arch/mips/kernel/relocate_kernel.S @@ -147,10 +147,10 @@ LEAF(kexec_smp_wait) kexec_args: EXPORT(kexec_args) -arg0: PTR 0x0 -arg1: PTR 0x0 -arg2: PTR 0x0 -arg3: PTR 0x0 +arg0: PTR_WD 0x0 +arg1: PTR_WD 0x0 +arg2: PTR_WD 0x0 +arg3: PTR_WD 0x0 .size kexec_args,PTRSIZE*4 #ifdef CONFIG_SMP @@ -161,10 +161,10 @@ arg3: PTR 0x0 */ secondary_kexec_args: EXPORT(secondary_kexec_args) -s_arg0: PTR 0x0 -s_arg1: PTR 0x0 -s_arg2: PTR 0x0 -s_arg3: PTR 0x0 +s_arg0: PTR_WD 0x0 +s_arg1: PTR_WD 0x0 +s_arg2: PTR_WD 0x0 +s_arg3: PTR_WD 0x0 .size secondary_kexec_args,PTRSIZE*4 kexec_flag: LONG 0x1 @@ -173,17 +173,17 @@ kexec_flag: kexec_start_address: EXPORT(kexec_start_address) - PTR 0x0 + PTR_WD 0x0 .size kexec_start_address, PTRSIZE kexec_indirection_page: EXPORT(kexec_indirection_page) - PTR 0 + PTR_WD 0 .size kexec_indirection_page, PTRSIZE relocate_new_kernel_end: relocate_new_kernel_size: EXPORT(relocate_new_kernel_size) - PTR relocate_new_kernel_end - relocate_new_kernel + PTR_WD relocate_new_kernel_end - relocate_new_kernel .size relocate_new_kernel_size, PTRSIZE diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index b1b2e106f71184eddcea4b4cc031b857340bf755..9bfce5f75f6011f91e971bed8c6769c9574909dd 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -72,10 +72,10 @@ loads_done: .set pop .section __ex_table,"a" - PTR load_a4, bad_stack_a4 - PTR load_a5, bad_stack_a5 - PTR load_a6, bad_stack_a6 - PTR load_a7, bad_stack_a7 + PTR_WD load_a4, bad_stack_a4 + PTR_WD load_a5, bad_stack_a5 + PTR_WD load_a6, bad_stack_a6 + PTR_WD load_a7, bad_stack_a7 .previous lw t0, TI_FLAGS($28) # syscall tracing enabled? @@ -216,7 +216,7 @@ einval: li v0, -ENOSYS #endif /* CONFIG_MIPS_MT_FPAFF */ #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .align 2 .type sys_call_table, @object EXPORT(sys_call_table) diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index f650c55a17dc5c8421205aaf877fb1446527ce47..97456b2ca7dc32f13cac9a5843a3adea89735318 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -101,7 +101,7 @@ not_n32_scall: END(handle_sysn32) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .type sysn32_call_table, @object EXPORT(sysn32_call_table) #include diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S index 5d7bfc65e4d0b5158fa49ffdfc4ed46df4ca7364..5f6ed4b4c39937b1d13c4fe6329d2f8e66775fc4 100644 --- a/arch/mips/kernel/scall64-n64.S +++ b/arch/mips/kernel/scall64-n64.S @@ -109,7 +109,7 @@ illegal_syscall: j n64_syscall_exit END(handle_sys64) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .align 3 .type sys_call_table, @object EXPORT(sys_call_table) diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index cedc8bd8880468251d5b3abf6431f7c29e0a372e..d3c2616cba22690bffd63b4521dc0f0ea7216315 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -73,10 +73,10 @@ load_a7: lw a7, 28(t0) # argument #8 from usp loads_done: .section __ex_table,"a" - PTR load_a4, bad_stack_a4 - PTR load_a5, bad_stack_a5 - PTR load_a6, bad_stack_a6 - PTR load_a7, bad_stack_a7 + PTR_WD load_a4, bad_stack_a4 + PTR_WD load_a5, bad_stack_a5 + PTR_WD load_a6, bad_stack_a6 + PTR_WD load_a7, bad_stack_a7 .previous li t1, _TIF_WORK_SYSCALL_ENTRY @@ -214,7 +214,7 @@ einval: li v0, -ENOSYS END(sys32_syscall) #define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat) -#define __SYSCALL(nr, entry) PTR entry +#define __SYSCALL(nr, entry) PTR_WD entry .align 3 .type sys32_call_table,@object EXPORT(sys32_call_table) diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 5512cd586e6e89cf7897536f7dee78400b21c01e..ae93a607ddf7ed9acfb174beb7f8753bee63f1a6 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -122,8 +122,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 4b \n" - " "STR(PTR)" 2b, 4b \n" + " "STR(PTR_WD)" 1b, 4b \n" + " "STR(PTR_WD)" 2b, 4b \n" " .previous \n" " .set pop \n" : [old] "=&r" (old), @@ -152,8 +152,8 @@ static inline int mips_atomic_set(unsigned long addr, unsigned long new) " j 3b \n" " .previous \n" " .section __ex_table,\"a\" \n" - " "STR(PTR)" 1b, 5b \n" - " "STR(PTR)" 2b, 5b \n" + " "STR(PTR_WD)" 1b, 5b \n" + " "STR(PTR_WD)" 2b, 5b \n" " .previous \n" " .set pop \n" : [old] "=&r" (old), diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index e59cb6246f76314031dff911a0ff8714b7326177..a25e0b73ee7042c761b3c02b2cefde9b885c4e50 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -414,6 +414,24 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return -ENOIOCTLCMD; } +/* + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while + * the vCPU is running. + * + * This must be noinstr as instrumentation may make use of RCU, and this is not + * safe during the EQS. + */ +static int noinstr kvm_mips_vcpu_enter_exit(struct kvm_vcpu *vcpu) +{ + int ret; + + guest_state_enter_irqoff(); + ret = kvm_mips_callbacks->vcpu_run(vcpu); + guest_state_exit_irqoff(); + + return ret; +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { int r = -EINTR; @@ -434,7 +452,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) lose_fpu(1); local_irq_disable(); - guest_enter_irqoff(); + guest_timing_enter_irqoff(); trace_kvm_enter(vcpu); /* @@ -445,10 +463,23 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) */ smp_store_mb(vcpu->mode, IN_GUEST_MODE); - r = kvm_mips_callbacks->vcpu_run(vcpu); + r = kvm_mips_vcpu_enter_exit(vcpu); + + /* + * We must ensure that any pending interrupts are taken before + * we exit guest timing so that timer ticks are accounted as + * guest time. Transiently unmask interrupts so that any + * pending interrupts are taken. + * + * TODO: is there a barrier which ensures that pending interrupts are + * recognised? Currently this just hopes that the CPU takes any pending + * interrupts between the enable and disable. + */ + local_irq_enable(); + local_irq_disable(); trace_kvm_out(vcpu); - guest_exit_irqoff(); + guest_timing_exit_irqoff(); local_irq_enable(); out: @@ -1168,7 +1199,7 @@ static void kvm_mips_set_c0_status(void) /* * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) */ -int kvm_mips_handle_exit(struct kvm_vcpu *vcpu) +static int __kvm_mips_handle_exit(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; u32 cause = vcpu->arch.host_cp0_cause; @@ -1357,6 +1388,17 @@ int kvm_mips_handle_exit(struct kvm_vcpu *vcpu) return ret; } +int noinstr kvm_mips_handle_exit(struct kvm_vcpu *vcpu) +{ + int ret; + + guest_state_exit_irqoff(); + ret = __kvm_mips_handle_exit(vcpu); + guest_state_enter_irqoff(); + + return ret; +} + /* Enable FPU for guest and restore context */ void kvm_own_fpu(struct kvm_vcpu *vcpu) { diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 4adca5abbc72d5a98ee863a0821746e63aa16889..c706f5890a05c6749ff13a4d0b68b7f541d85aa7 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -458,8 +458,8 @@ void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) /** * _kvm_vz_save_htimer() - Switch to software emulation of guest timer. * @vcpu: Virtual CPU. - * @compare: Pointer to write compare value to. - * @cause: Pointer to write cause value to. + * @out_compare: Pointer to write compare value to. + * @out_cause: Pointer to write cause value to. * * Save VZ guest timer state and switch to software emulation of guest CP0 * timer. The hard timer must already be in use, so preemption should be @@ -1541,11 +1541,14 @@ static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu) } /** - * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor. + * kvm_trap_vz_handle_cop_unusable() - Guest used unusable coprocessor. * @vcpu: Virtual CPU context. * * Handle when the guest attempts to use a coprocessor which hasn't been allowed * by the root context. + * + * Return: value indicating whether to resume the host or the guest + * (RESUME_HOST or RESUME_GUEST) */ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu) { @@ -1592,6 +1595,9 @@ static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu) * * Handle when the guest attempts to use MSA when it is disabled in the root * context. + * + * Return: value indicating whether to resume the host or the guest + * (RESUME_HOST or RESUME_GUEST) */ static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu) { diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index a46db080719531b1004d354c5627e7aa79ba77c5..7767137c3e49ad35690451002a9078704e190557 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -347,7 +347,7 @@ EXPORT_SYMBOL(csum_partial) .if \mode == LEGACY_MODE; \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, .L_exc; \ + PTR_WD 9b, .L_exc; \ .previous; \ /* This is enabled in EVA mode */ \ .else; \ @@ -356,7 +356,7 @@ EXPORT_SYMBOL(csum_partial) ((\to == USEROP) && (type == ST_INSN)); \ 9: __BUILD_EVA_INSN(insn##e, reg, addr); \ .section __ex_table,"a"; \ - PTR 9b, .L_exc; \ + PTR_WD 9b, .L_exc; \ .previous; \ .else; \ /* EVA without exception */ \ diff --git a/arch/mips/lib/memcpy.S b/arch/mips/lib/memcpy.S index 277c32296636da5654d73aab3f1d99c713337589..18a43f2e29c81a797635608d07f3c458bffe2dd9 100644 --- a/arch/mips/lib/memcpy.S +++ b/arch/mips/lib/memcpy.S @@ -116,7 +116,7 @@ .if \mode == LEGACY_MODE; \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous; \ /* This is assembled in EVA mode */ \ .else; \ @@ -125,7 +125,7 @@ ((\to == USEROP) && (type == ST_INSN)); \ 9: __BUILD_EVA_INSN(insn##e, reg, addr); \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous; \ .else; \ /* \ diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S index b0baa3c79fad0403033b948d22ddcfaa7feeaf6b..0b342bae9a98c13c63e06f8618288232885c2715 100644 --- a/arch/mips/lib/memset.S +++ b/arch/mips/lib/memset.S @@ -52,7 +52,7 @@ 9: ___BUILD_EVA_INSN(insn, reg, addr); \ .endif; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous .macro f_fill64 dst, offset, val, fixup, mode diff --git a/arch/mips/lib/strncpy_user.S b/arch/mips/lib/strncpy_user.S index 556acf684d7beaefe05c69d967c9c4f5363fffdc..13aaa9927ad124957fbc2329229771c2641b1bd1 100644 --- a/arch/mips/lib/strncpy_user.S +++ b/arch/mips/lib/strncpy_user.S @@ -15,7 +15,7 @@ #define EX(insn,reg,addr,handler) \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous /* @@ -59,7 +59,7 @@ LEAF(__strncpy_from_user_asm) jr ra .section __ex_table,"a" - PTR 1b, .Lfault + PTR_WD 1b, .Lfault .previous EXPORT_SYMBOL(__strncpy_from_user_asm) diff --git a/arch/mips/lib/strnlen_user.S b/arch/mips/lib/strnlen_user.S index 92b63f20ec05fd71a13ea8d7c905d14451e105b1..6de31b616f9c1a808ea26311c3e9d41abea71866 100644 --- a/arch/mips/lib/strnlen_user.S +++ b/arch/mips/lib/strnlen_user.S @@ -14,7 +14,7 @@ #define EX(insn,reg,addr,handler) \ 9: insn reg, addr; \ .section __ex_table,"a"; \ - PTR 9b, handler; \ + PTR_WD 9b, handler; \ .previous /* diff --git a/arch/mips/loongson64/vbios_quirk.c b/arch/mips/loongson64/vbios_quirk.c index 9a29e94d3db1d072b795bd3f512d4aa5a1adf61f..3115d4de982c5c5d337caf6ea0a71cc077e34427 100644 --- a/arch/mips/loongson64/vbios_quirk.c +++ b/arch/mips/loongson64/vbios_quirk.c @@ -3,7 +3,7 @@ #include #include -static void pci_fixup_radeon(struct pci_dev *pdev) +static void pci_fixup_video(struct pci_dev *pdev) { struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; @@ -22,8 +22,7 @@ static void pci_fixup_radeon(struct pci_dev *pdev) res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | IORESOURCE_PCI_FIXED; - dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n", - PCI_ROM_RESOURCE, res); + dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n", res); } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, 0x9615, - PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon); +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_VENDOR_ID_ATI, 0x9615, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); diff --git a/arch/openrisc/include/asm/bitops.h b/arch/openrisc/include/asm/bitops.h index 7f1ca35213d8ce5c35dc38d5dda816d1d8678ce0..d773ed938acb4e1c405e96059f9924ab46cd5949 100644 --- a/arch/openrisc/include/asm/bitops.h +++ b/arch/openrisc/include/asm/bitops.h @@ -30,7 +30,6 @@ #include #include #include -#include #ifndef _LINUX_BITOPS_H #error only can be included directly diff --git a/arch/parisc/include/asm/bitops.h b/arch/parisc/include/asm/bitops.h index daa2afd974fbf1d07fba1be7d010bdd7fc087c51..56ffd260c669b7d9ce50838edb07acb1de004600 100644 --- a/arch/parisc/include/asm/bitops.h +++ b/arch/parisc/include/asm/bitops.h @@ -12,6 +12,14 @@ #include #include +/* compiler build environment sanity checks: */ +#if !defined(CONFIG_64BIT) && defined(__LP64__) +#error "Please use 'ARCH=parisc' to build the 32-bit kernel." +#endif +#if defined(CONFIG_64BIT) && !defined(__LP64__) +#error "Please use 'ARCH=parisc64' to build the 64-bit kernel." +#endif + /* See http://marc.theaimsgroup.com/?t=108826637900003 for discussion * on use of volatile and __*_bit() (set/clear/change): * *_bit() want use of volatile. @@ -203,7 +211,6 @@ static __inline__ int fls(unsigned int x) #include #include #include -#include #include #include diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index b669f4b9040bfa81dac5acbf8ac76221259be268..3a3d0543840827c5039fbbe5d623dde4b8d19639 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -289,6 +289,7 @@ extern int _parisc_requires_coherency; extern int running_on_qemu; +extern void __noreturn toc_intr(struct pt_regs *regs); extern void toc_handler(void); extern unsigned int toc_handler_size; extern unsigned int toc_handler_csum; diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index ebf8a845b01742f1fe75c8cace45b6e2ef4373ce..123d5f16cd9db2381c95247e20dc5dc18a78d33a 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -89,8 +89,8 @@ struct exception_table_entry { __asm__("1: " ldx " 0(" sr "%2),%0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - : "=r"(__gu_val), "=r"(__gu_err) \ - : "r"(ptr), "1"(__gu_err)); \ + : "=r"(__gu_val), "+r"(__gu_err) \ + : "r"(ptr)); \ \ (val) = (__force __typeof__(*(ptr))) __gu_val; \ } @@ -123,8 +123,8 @@ struct exception_table_entry { "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ - : "=&r"(__gu_tmp.l), "=r"(__gu_err) \ - : "r"(ptr), "1"(__gu_err)); \ + : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ + : "r"(ptr)); \ \ (val) = __gu_tmp.t; \ } @@ -135,13 +135,12 @@ struct exception_table_entry { #define __put_user_internal(sr, x, ptr) \ ({ \ ASM_EXCEPTIONTABLE_VAR(__pu_err); \ - __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x); \ \ switch (sizeof(*(ptr))) { \ - case 1: __put_user_asm(sr, "stb", __x, ptr); break; \ - case 2: __put_user_asm(sr, "sth", __x, ptr); break; \ - case 4: __put_user_asm(sr, "stw", __x, ptr); break; \ - case 8: STD_USER(sr, __x, ptr); break; \ + case 1: __put_user_asm(sr, "stb", x, ptr); break; \ + case 2: __put_user_asm(sr, "sth", x, ptr); break; \ + case 4: __put_user_asm(sr, "stw", x, ptr); break; \ + case 8: STD_USER(sr, x, ptr); break; \ default: BUILD_BUG(); \ } \ \ @@ -150,7 +149,9 @@ struct exception_table_entry { #define __put_user(x, ptr) \ ({ \ - __put_user_internal("%%sr3,", x, ptr); \ + __typeof__(&*(ptr)) __ptr = ptr; \ + __typeof__(*(__ptr)) __x = (__typeof__(*(__ptr)))(x); \ + __put_user_internal("%%sr3,", __x, __ptr); \ }) #define __put_kernel_nofault(dst, src, type, err_label) \ @@ -180,8 +181,8 @@ struct exception_table_entry { "1: " stx " %2,0(" sr "%1)\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ - : "=r"(__pu_err) \ - : "r"(ptr), "r"(x), "0"(__pu_err)) + : "+r"(__pu_err) \ + : "r"(ptr), "r"(x)) #if !defined(CONFIG_64BIT) @@ -193,8 +194,8 @@ struct exception_table_entry { "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ - : "=r"(__pu_err) \ - : "r"(ptr), "r"(__val), "0"(__pu_err)); \ + : "+r"(__pu_err) \ + : "r"(ptr), "r"(__val)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index cceb09855e03f8d8234a819519c5dd035f3d3067..b91cb45ffd4e3eaf4afee92c2f60efbc92f2f8e3 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -48,6 +48,7 @@ struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL; void __init setup_cmdline(char **cmdline_p) { extern unsigned int boot_args[]; + char *p; /* Collect stuff passed in from the boot loader */ @@ -56,9 +57,19 @@ void __init setup_cmdline(char **cmdline_p) /* called from hpux boot loader */ boot_command_line[0] = '\0'; } else { - strlcpy(boot_command_line, (char *)__va(boot_args[1]), + strscpy(boot_command_line, (char *)__va(boot_args[1]), COMMAND_LINE_SIZE); + /* autodetect console type (if not done by palo yet) */ + p = boot_command_line; + if (!str_has_prefix(p, "console=") && !strstr(p, " console=")) { + strlcat(p, " console=", COMMAND_LINE_SIZE); + if (PAGE0->mem_cons.cl_class == CL_DUPLEX) + strlcat(p, "ttyS0", COMMAND_LINE_SIZE); + else + strlcat(p, "tty0", COMMAND_LINE_SIZE); + } + #ifdef CONFIG_BLK_DEV_INITRD if (boot_args[2] != 0) /* did palo pass us a ramdisk? */ { @@ -68,7 +79,7 @@ void __init setup_cmdline(char **cmdline_p) #endif } - strcpy(command_line, boot_command_line); + strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; } diff --git a/arch/parisc/kernel/toc.c b/arch/parisc/kernel/toc.c index be9a0bebe61e7395d218310cf8dec1536573ac4e..e4b48d07afbdd2c571d75e55824ccc45bd2dd46a 100644 --- a/arch/parisc/kernel/toc.c +++ b/arch/parisc/kernel/toc.c @@ -10,9 +10,10 @@ #include #include #include +#include static unsigned int __aligned(16) toc_lock = 1; -DEFINE_PER_CPU_PAGE_ALIGNED(char [16384], toc_stack); +DEFINE_PER_CPU_PAGE_ALIGNED(char [16384], toc_stack) __visible; static void toc20_to_pt_regs(struct pt_regs *regs, struct pdc_toc_pim_20 *toc) { diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index 367f6397bda7a4c05f0d577047e4a0d15df01859..86038505808573ce7bd45d306912b262177c2a9f 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -346,6 +346,16 @@ u64 ioread64be(const void __iomem *addr) return *((u64 *)addr); } +u64 ioread64_lo_hi(const void __iomem *addr) +{ + u32 low, high; + + low = ioread32(addr); + high = ioread32(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} + u64 ioread64_hi_lo(const void __iomem *addr) { u32 low, high; @@ -419,6 +429,12 @@ void iowrite64be(u64 datum, void __iomem *addr) } } +void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32(val, addr); + iowrite32(val >> 32, addr + sizeof(u32)); +} + void iowrite64_hi_lo(u64 val, void __iomem *addr) { iowrite32(val >> 32, addr + sizeof(u32)); @@ -530,6 +546,7 @@ EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); EXPORT_SYMBOL(ioread64); EXPORT_SYMBOL(ioread64be); +EXPORT_SYMBOL(ioread64_lo_hi); EXPORT_SYMBOL(ioread64_hi_lo); EXPORT_SYMBOL(iowrite8); EXPORT_SYMBOL(iowrite16); @@ -538,6 +555,7 @@ EXPORT_SYMBOL(iowrite32); EXPORT_SYMBOL(iowrite32be); EXPORT_SYMBOL(iowrite64); EXPORT_SYMBOL(iowrite64be); +EXPORT_SYMBOL(iowrite64_lo_hi); EXPORT_SYMBOL(iowrite64_hi_lo); EXPORT_SYMBOL(ioread8_rep); EXPORT_SYMBOL(ioread16_rep); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 1ae31db9988f5635413e60a0e295704915ce78e5..1dc2e88e7b04fc60eb80c65bc99887e415d2cec0 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -337,9 +337,9 @@ static void __init setup_bootmem(void) static bool kernel_set_to_readonly; -static void __init map_pages(unsigned long start_vaddr, - unsigned long start_paddr, unsigned long size, - pgprot_t pgprot, int force) +static void __ref map_pages(unsigned long start_vaddr, + unsigned long start_paddr, unsigned long size, + pgprot_t pgprot, int force) { pmd_t *pmd; pte_t *pg_table; @@ -449,7 +449,7 @@ void __init set_kernel_text_rw(int enable_read_write) flush_tlb_all(); } -void __ref free_initmem(void) +void free_initmem(void) { unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; @@ -463,7 +463,6 @@ void __ref free_initmem(void) /* The init text pages are marked R-X. We have to * flush the icache and mark them RW- * - * This is tricky, because map_pages is in the init section. * Do a dummy remap of the data section first (the data * section is already PAGE_KERNEL) to pull in the TLB entries * for map_kernel */ diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index a05d8c62cbea81cbc3504811e8989bce3fe69d69..ea5d27dda8cf4aa6077202fa3df2308f3a0de672 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -328,8 +328,6 @@ unsigned long __arch_hweight64(__u64 w); #include #endif -#include - /* wrappers that deal with KASAN instrumentation */ #include #include diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index 7be27862329fc4245e55c33828423b7d87b90f59..78c6a5fde1d615eb1807047f607ebc630aa5a1af 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -223,6 +223,8 @@ static __always_inline void update_user_segments(u32 val) update_user_segment(15, val); } +int __init find_free_bat(void); +unsigned int bat_block_size(unsigned long base, unsigned long top); #endif /* !__ASSEMBLY__ */ /* We happily ignore the smaller BATs on 601, we don't actually use diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 609c80f671943b540a6c7352ec2619b68566a1b6..f8b94f78403f192b94d11ebb7cb9720376831008 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -178,6 +178,7 @@ static inline bool pte_user(pte_t pte) #ifndef __ASSEMBLY__ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 33e073d6b0c410afa9ab756c4963e9d65fc35524..875730d5af408933023a914fd5506a9e812786ec 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1082,6 +1082,8 @@ static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t p return hash__map_kernel_page(ea, pa, prot); } +void unmap_kernel_page(unsigned long va); + static inline int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys) diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 947b5b9c44241124ef3c00816041127d821a05e8..a832aeafe560153535cc486f4ceee0d33a9547a4 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -111,8 +111,10 @@ static inline void __set_fixmap(enum fixed_addresses idx, BUILD_BUG_ON(idx >= __end_of_fixed_addresses); else if (WARN_ON(idx >= __end_of_fixed_addresses)) return; - - map_kernel_page(__fix_to_virt(idx), phys, flags); + if (pgprot_val(flags)) + map_kernel_page(__fix_to_virt(idx), phys, flags); + else + unmap_kernel_page(__fix_to_virt(idx)); } #define __early_set_fixmap __set_fixmap diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index a58fb4aa6c81d68b7188cb36a900f59189c7c39c..674e5aaafcbda522b5e83b48ce145397accf68be 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -473,7 +473,7 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) return !(regs->msr & MSR_EE); } -static inline bool should_hard_irq_enable(void) +static __always_inline bool should_hard_irq_enable(void) { return false; } diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fe07558173ef4f212174f38792aa993ccb50d539..827038a33064b9b5703d617bdb23dc8cf1e6b04e 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -39,7 +39,6 @@ struct kvm_nested_guest { pgd_t *shadow_pgtable; /* our page table for this guest */ u64 l1_gr_to_hr; /* L1's addr of part'n-scoped table */ u64 process_table; /* process table entry for this guest */ - u64 hfscr; /* HFSCR that the L1 requested for this nested guest */ long refcnt; /* number of pointers to this struct */ struct mutex tlb_lock; /* serialize page faults and tlbies */ struct kvm_nested_guest *next; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index a770443cd6e01feb148ee563fb45ed7c6954f87b..d9bf60bf081617c7c3a7f5fbaa1527cb3f6e70b8 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -818,6 +818,7 @@ struct kvm_vcpu_arch { /* For support of nested guests */ struct kvm_nested_guest *nested; + u64 nested_hfscr; /* HFSCR that the L1 requested for the nested guest */ u32 nested_vcpu_id; gpa_t nested_io_gpr; #endif diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b67742e2a9b22910e3828e19ddfdd9a2b90d3e76..d959c2a73fbf4c839b923644e4a31446d3c74734 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -64,6 +64,7 @@ extern int icache_44x_need_flush; #ifndef __ASSEMBLY__ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index a3313e853e5e8bb38c293302a31ec35e9830c5f9..2816d158280ade8adf22d8a9451b6e93f13b02c1 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -308,6 +308,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __swp_entry_to_pte(x) __pte((x).val) int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); extern int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index efad07081cc0e59612c872d67d43854cc2e9d6b0..9675303b724e93aab9c7ac132452a3b481bbc1f0 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -500,6 +500,7 @@ #define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) #define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i)) diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h index 52d05b465e3ec96cd4de0128c4cf96d04d3c6bb6..25fc8ad9a27afaba9b17b8cc74b82fb7caa3c5f8 100644 --- a/arch/powerpc/include/asm/syscall.h +++ b/arch/powerpc/include/asm/syscall.h @@ -90,7 +90,7 @@ static inline void syscall_get_arguments(struct task_struct *task, unsigned long val, mask = -1UL; unsigned int n = 6; - if (is_32bit_task()) + if (is_tsk_32bit_task(task)) mask = 0xffffffff; while (n--) { @@ -105,7 +105,7 @@ static inline void syscall_get_arguments(struct task_struct *task, static inline int syscall_get_arch(struct task_struct *task) { - if (is_32bit_task()) + if (is_tsk_32bit_task(task)) return AUDIT_ARCH_PPC; else if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) return AUDIT_ARCH_PPC64LE; diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index 5725029aaa2957eef73c9c3a92429fff441cb5d7..d6e649b3c70b679ed22cafc21130c5cc1ed42b58 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -168,8 +168,10 @@ static inline bool test_thread_local_flags(unsigned int flags) #ifdef CONFIG_COMPAT #define is_32bit_task() (test_thread_flag(TIF_32BIT)) +#define is_tsk_32bit_task(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT)) #else #define is_32bit_task() (IS_ENABLED(CONFIG_PPC32)) +#define is_tsk_32bit_task(tsk) (IS_ENABLED(CONFIG_PPC32)) #endif #if defined(CONFIG_PPC64) diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 92088f848266e7359c6cb327f3de988c061cafc8..7bab2d7de372e0ab24a0f63d5293589762131dbf 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -30,6 +30,7 @@ COMPAT_SYS_CALL_TABLE: .ifc \srr,srr mfspr r11,SPRN_SRR0 ld r12,_NIP(r1) + clrrdi r11,r11,2 clrrdi r12,r12,2 100: tdne r11,r12 EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) @@ -40,6 +41,7 @@ COMPAT_SYS_CALL_TABLE: .else mfspr r11,SPRN_HSRR0 ld r12,_NIP(r1) + clrrdi r11,r11,2 clrrdi r12,r12,2 100: tdne r11,r12 EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE) diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index 877817471e3c12f657e18aab19e49c654674993c..6a029f2378e1833ed06406a4f16bfc95bf46d382 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -25,7 +25,7 @@ static ssize_t page_map_read( struct file *file, char __user *buf, size_t nbytes loff_t *ppos) { return simple_read_from_buffer(buf, nbytes, ppos, - PDE_DATA(file_inode(file)), PAGE_SIZE); + pde_data(file_inode(file)), PAGE_SIZE); } static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) @@ -34,7 +34,7 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) return -EINVAL; remap_pfn_range(vma, vma->vm_start, - __pa(PDE_DATA(file_inode(file))) >> PAGE_SHIFT, + __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot); return 0; } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 62361cc7281cd74f2fe5aab0e6df36e0098cdaba..cd0b8b71ecddc15b9250b326b1e03a36b26e96c2 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -649,8 +649,9 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) __this_cpu_inc(irq_stat.timer_irqs_event); } else { now = *next_tb - now; - if (now <= decrementer_max) - set_dec_or_work(now); + if (now > decrementer_max) + now = decrementer_max; + set_dec_or_work(now); __this_cpu_inc(irq_stat.timer_irqs_others); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d1817cd9a691f4c1634d53433a75fd57694c4ab3..84c89f08ae9aa97eb04b8de6b91f491dedf0592a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1816,7 +1816,6 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu, static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) { - struct kvm_nested_guest *nested = vcpu->arch.nested; int r; int srcu_idx; @@ -1922,7 +1921,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) * it into a HEAI. */ if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) || - (nested->hfscr & (1UL << cause))) { + (vcpu->arch.nested_hfscr & (1UL << cause))) { vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST; /* diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 8f8daaeeb3b75b95b5434e8e187bd06b271aab3f..9d373f8963ee98a9977f4a796f200c6343b5d67b 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -363,7 +363,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) /* set L1 state to L2 state */ vcpu->arch.nested = l2; vcpu->arch.nested_vcpu_id = l2_hv.vcpu_token; - l2->hfscr = l2_hv.hfscr; + vcpu->arch.nested_hfscr = l2_hv.hfscr; vcpu->arch.regs = l2_regs; /* Guest must always run with ME enabled, HV disabled. */ diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 94045b265b6b9f5ad6adcc25356be71e5fb4ac08..203735caf6915ad6b5f49161f6a82e7e48d836cd 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -76,7 +76,7 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -static int __init find_free_bat(void) +int __init find_free_bat(void) { int b; int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; @@ -100,7 +100,7 @@ static int __init find_free_bat(void) * - block size has to be a power of two. This is calculated by finding the * highest bit set to 1. */ -static unsigned int block_size(unsigned long base, unsigned long top) +unsigned int bat_block_size(unsigned long base, unsigned long top) { unsigned int max_size = SZ_256M; unsigned int base_shift = (ffs(base) - 1) & 31; @@ -145,7 +145,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to int idx; while ((idx = find_free_bat()) != -1 && base != top) { - unsigned int size = block_size(base, top); + unsigned int size = bat_block_size(base, top); if (size < 128 << 10) break; @@ -201,12 +201,12 @@ void mmu_mark_initmem_nx(void) unsigned long size; for (i = 0; i < nb - 1 && base < top;) { - size = block_size(base, top); + size = bat_block_size(base, top); setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; } if (base < top) { - size = block_size(base, top); + size = bat_block_size(base, top); if ((top - base) > size) { size <<= 1; if (strict_kernel_rwx_enabled() && base + size > border) diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c index 35b287b0a8da4e2a6d7cf6d15a215ce820ea2c13..450a67ef0bbe1eb4fc4f897e897f1ff24727f44c 100644 --- a/arch/powerpc/mm/kasan/book3s_32.c +++ b/arch/powerpc/mm/kasan/book3s_32.c @@ -10,48 +10,51 @@ int __init kasan_init_region(void *start, size_t size) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); - unsigned long k_cur = k_start; - int k_size = k_end - k_start; - int k_size_base = 1 << (ffs(k_size) - 1); + unsigned long k_nobat = k_start; + unsigned long k_cur; + phys_addr_t phys; int ret; - void *block; - block = memblock_alloc(k_size, k_size_base); - - if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { - int shift = ffs(k_size - k_size_base); - int k_size_more = shift ? 1 << (shift - 1) : 0; - - setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); - if (k_size_more >= SZ_128K) - setbat(-1, k_start + k_size_base, __pa(block) + k_size_base, - k_size_more, PAGE_KERNEL); - if (v_block_mapped(k_start)) - k_cur = k_start + k_size_base; - if (v_block_mapped(k_start + k_size_base)) - k_cur = k_start + k_size_base + k_size_more; - - update_bats(); + while (k_nobat < k_end) { + unsigned int k_size = bat_block_size(k_nobat, k_end); + int idx = find_free_bat(); + + if (idx == -1) + break; + if (k_size < SZ_128K) + break; + phys = memblock_phys_alloc_range(k_size, k_size, 0, + MEMBLOCK_ALLOC_ANYWHERE); + if (!phys) + break; + + setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL); + k_nobat += k_size; } + if (k_nobat != k_start) + update_bats(); - if (!block) - block = memblock_alloc(k_size, PAGE_SIZE); - if (!block) - return -ENOMEM; + if (k_nobat < k_end) { + phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0, + MEMBLOCK_ALLOC_ANYWHERE); + if (!phys) + return -ENOMEM; + } ret = kasan_init_shadow_page_tables(k_start, k_end); if (ret) return ret; - kasan_update_early_region(k_start, k_cur, __pte(0)); + kasan_update_early_region(k_start, k_nobat, __pte(0)); - for (; k_cur < k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); - void *va = block + k_cur - k_start; - pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL); __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); } flush_tlb_kernel_range(k_start, k_end); + memset(kasan_mem_to_shadow(start), 0, k_end - k_start); + return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index abb3198bd277ba28ee96f686a1fa9d81aa18adf7..6ec5a7dd79137cea25470fde53cc80ff2ff020df 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -206,6 +206,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, __set_pte_at(mm, addr, ptep, pte, 0); } +void unmap_kernel_page(unsigned long va) +{ + pmd_t *pmdp = pmd_off_k(va); + pte_t *ptep = pte_offset_kernel(pmdp, va); + + pte_clear(&init_mm, va, ptep); + flush_tlb_kernel_range(va, va + PAGE_SIZE); +} + /* * This is called when relaxing access to a PTE. It's also called in the page * fault path when we don't hit any of the major fault cases, ie, a minor diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index d6ffdd0f2309d0bbfb06fdc7c94d8490502db093..56dd1f4e3e4447ba79ce39802ad076caf8855a96 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -23,15 +23,15 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int size) memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } -/* Fix the branch target addresses for subprog calls */ -static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, - struct codegen_context *ctx, u32 *addrs) +/* Fix updated addresses (for subprog calls, ldimm64, et al) during extra pass */ +static int bpf_jit_fixup_addresses(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) { const struct bpf_insn *insn = fp->insnsi; bool func_addr_fixed; u64 func_addr; u32 tmp_idx; - int i, ret; + int i, j, ret; for (i = 0; i < fp->len; i++) { /* @@ -66,6 +66,23 @@ static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, * of the JITed sequence remains unchanged. */ ctx->idx = tmp_idx; + } else if (insn[i].code == (BPF_LD | BPF_IMM | BPF_DW)) { + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; +#ifdef CONFIG_PPC32 + PPC_LI32(ctx->b2p[insn[i].dst_reg] - 1, (u32)insn[i + 1].imm); + PPC_LI32(ctx->b2p[insn[i].dst_reg], (u32)insn[i].imm); + for (j = ctx->idx - addrs[i] / 4; j < 4; j++) + EMIT(PPC_RAW_NOP()); +#else + func_addr = ((u64)(u32)insn[i].imm) | (((u64)(u32)insn[i + 1].imm) << 32); + PPC_LI64(b2p[insn[i].dst_reg], func_addr); + /* overwrite rest with nops */ + for (j = ctx->idx - addrs[i] / 4; j < 5; j++) + EMIT(PPC_RAW_NOP()); +#endif + ctx->idx = tmp_idx; + i++; } } @@ -200,13 +217,13 @@ skip_init_ctx: /* * Do not touch the prologue and epilogue as they will remain * unchanged. Only fix the branch target address for subprog - * calls in the body. + * calls in the body, and ldimm64 instructions. * * This does not change the offsets and lengths of the subprog * call instruction sequences and hence, the size of the JITed * image as well. */ - bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + bpf_jit_fixup_addresses(fp, code_base, &cgctx, addrs); /* There is no need to perform the usual passes. */ goto skip_codegen_passes; diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index faaebd446cadf2b4cc0c7fcce8167f4890965d87..cf8dd8aea386c4bb8d5366f51015102731bba82d 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -191,6 +191,9 @@ void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 fun if (image && rel < 0x2000000 && rel >= -0x2000000) { PPC_BL_ABS(func); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_NOP()); } else { /* Load function address into r0 */ EMIT(PPC_RAW_LIS(_R0, IMM_H(func))); @@ -290,6 +293,8 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * bool func_addr_fixed; u64 func_addr; u32 true_cond; + u32 tmp_idx; + int j; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -905,8 +910,12 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * * 16 byte instruction that uses two 'struct bpf_insn' */ case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ + tmp_idx = ctx->idx; PPC_LI32(dst_reg_h, (u32)insn[i + 1].imm); PPC_LI32(dst_reg, (u32)insn[i].imm); + /* padding to allow full 4 instructions for later patching */ + for (j = ctx->idx - tmp_idx; j < 4; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; break; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 9eae8d8ed3402e13a5a53c837af7ab4f0600c7c1..e1e8c934308adbf0185de8dd54173672e3c9c6b0 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -319,6 +319,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context * u64 imm64; u32 true_cond; u32 tmp_idx; + int j; /* * addrs[] maps a BPF bytecode address into a real offset from @@ -633,17 +634,21 @@ bpf_alu32_trunc: EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); break; case 64: - /* - * Way easier and faster(?) to store the value - * into stack and then use ldbrx - * - * ctx->seen will be reliable in pass2, but - * the instructions generated will remain the - * same across all passes - */ + /* Store the value to stack and then use byte-reverse loads */ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); - EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + } else { + EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1])); + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4)); + EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1])); + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2])); + } break; } break; @@ -848,9 +853,13 @@ emit_clear: case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ imm64 = ((u64)(u32) insn[i].imm) | (((u64)(u32) insn[i+1].imm) << 32); + tmp_idx = ctx->idx; + PPC_LI64(dst_reg, imm64); + /* padding to allow full 5 instructions for later patching */ + for (j = ctx->idx - tmp_idx; j < 5; j++) + EMIT(PPC_RAW_NOP()); /* Adjust for two bpf instructions */ addrs[++i] = ctx->idx * 4; - PPC_LI64(dst_reg, imm64); break; /* diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index a684901b6965756763349136227ed5faffc658ab..b5b42cf0a7039d46601026b173ff1869af2a1400 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -776,6 +776,34 @@ static void pmao_restore_workaround(bool ebb) mtspr(SPRN_PMC6, pmcs[5]); } +/* + * If the perf subsystem wants performance monitor interrupts as soon as + * possible (e.g., to sample the instruction address and stack chain), + * this should return true. The IRQ masking code can then enable MSR[EE] + * in some places (e.g., interrupt handlers) that allows PMI interrupts + * through to improve accuracy of profiles, at the cost of some performance. + * + * The PMU counters can be enabled by other means (e.g., sysfs raw SPR + * access), but in that case there is no need for prompt PMI handling. + * + * This currently returns true if any perf counter is being used. It + * could possibly return false if only events are being counted rather than + * samples being taken, but for now this is good enough. + */ +bool power_pmu_wants_prompt_pmi(void) +{ + struct cpu_hw_events *cpuhw; + + /* + * This could simply test local_paca->pmcregs_in_use if that were not + * under ifdef KVM. + */ + if (!ppmu) + return false; + + cpuhw = this_cpu_ptr(&cpu_hw_events); + return cpuhw->n_events; +} #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -1327,9 +1355,20 @@ static void power_pmu_disable(struct pmu *pmu) * Otherwise provide a warning if there is PMI pending, but * no counter is found overflown. */ - if (any_pmc_overflown(cpuhw)) - clear_pmi_irq_pending(); - else + if (any_pmc_overflown(cpuhw)) { + /* + * Since power_pmu_disable runs under local_irq_save, it + * could happen that code hits a PMC overflow without PMI + * pending in paca. Hence only clear PMI pending if it was + * set. + * + * If a PMI is pending, then MSR[EE] must be disabled (because + * the masked PMI handler disabling EE). So it is safe to + * call clear_pmi_irq_pending(). + */ + if (pmi_irq_pending()) + clear_pmi_irq_pending(); + } else WARN_ON(pmi_irq_pending()); val = mmcra = cpuhw->mmcr.mmcra; @@ -2438,36 +2477,6 @@ static void perf_event_interrupt(struct pt_regs *regs) perf_sample_event_took(sched_clock() - start_clock); } -/* - * If the perf subsystem wants performance monitor interrupts as soon as - * possible (e.g., to sample the instruction address and stack chain), - * this should return true. The IRQ masking code can then enable MSR[EE] - * in some places (e.g., interrupt handlers) that allows PMI interrupts - * though to improve accuracy of profiles, at the cost of some performance. - * - * The PMU counters can be enabled by other means (e.g., sysfs raw SPR - * access), but in that case there is no need for prompt PMI handling. - * - * This currently returns true if any perf counter is being used. It - * could possibly return false if only events are being counted rather than - * samples being taken, but for now this is good enough. - */ -bool power_pmu_wants_prompt_pmi(void) -{ - struct cpu_hw_events *cpuhw; - - /* - * This could simply test local_paca->pmcregs_in_use if that were not - * under ifdef KVM. - */ - - if (!ppmu) - return false; - - cpuhw = this_cpu_ptr(&cpu_hw_events); - return cpuhw->n_events; -} - static int power_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); diff --git a/arch/powerpc/platforms/pasemi/dma_lib.c b/arch/powerpc/platforms/pasemi/dma_lib.c index 270fa3c0d3724927cd912dc8a67ebe75abc26d80..26427311fc7255a8826c330a2bdf333d69f37423 100644 --- a/arch/powerpc/platforms/pasemi/dma_lib.c +++ b/arch/powerpc/platforms/pasemi/dma_lib.c @@ -375,7 +375,7 @@ int pasemi_dma_alloc_flag(void) int bit; retry: - bit = find_next_bit(flags_free, MAX_FLAGS, 0); + bit = find_first_bit(flags_free, MAX_FLAGS); if (bit >= MAX_FLAGS) return -ENOSPC; if (!test_and_clear_bit(bit, flags_free)) @@ -440,7 +440,7 @@ int pasemi_dma_alloc_fun(void) int bit; retry: - bit = find_next_bit(fun_free, MAX_FLAGS, 0); + bit = find_first_bit(fun_free, MAX_FLAGS); if (bit >= MAX_FLAGS) return -ENOSPC; if (!test_and_clear_bit(bit, fun_free)) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 171ecc6d1792ff3bfed997394b3f9a438d05492d..5adcbd9b5e88637f3e5d6fa562da6b15e7ec846a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -147,27 +147,16 @@ config MMU Select if you want MMU-based virtualised addressing space support by paged memory management. If unsure, say 'Y'. -config VA_BITS - int - default 32 if 32BIT - default 39 if 64BIT - -config PA_BITS - int - default 34 if 32BIT - default 56 if 64BIT - config PAGE_OFFSET hex - default 0xC0000000 if 32BIT && MAXPHYSMEM_1GB + default 0xC0000000 if 32BIT default 0x80000000 if 64BIT && !MMU - default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB - default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB + default 0xffffaf8000000000 if 64BIT config KASAN_SHADOW_OFFSET hex depends on KASAN_GENERIC - default 0xdfffffc800000000 if 64BIT + default 0xdfffffff00000000 if 64BIT default 0xffffffff if 32BIT config ARCH_FLATMEM_ENABLE @@ -213,7 +202,7 @@ config FIX_EARLYCON_MEM config PGTABLE_LEVELS int - default 3 if 64BIT + default 4 if 64BIT default 2 config LOCKDEP_SUPPORT @@ -271,24 +260,6 @@ config MODULE_SECTIONS bool select HAVE_MOD_ARCH_SPECIFIC -choice - prompt "Maximum Physical Memory" - default MAXPHYSMEM_1GB if 32BIT - default MAXPHYSMEM_2GB if 64BIT && CMODEL_MEDLOW - default MAXPHYSMEM_128GB if 64BIT && CMODEL_MEDANY - - config MAXPHYSMEM_1GB - depends on 32BIT - bool "1GiB" - config MAXPHYSMEM_2GB - depends on 64BIT - bool "2GiB" - config MAXPHYSMEM_128GB - depends on 64BIT && CMODEL_MEDANY - bool "128GiB" -endchoice - - config SMP bool "Symmetric Multi-Processing" help @@ -392,12 +363,25 @@ source "kernel/Kconfig.hz" config RISCV_SBI_V01 bool "SBI v0.1 support" - default y depends on RISCV_SBI help This config allows kernel to use SBI v0.1 APIs. This will be deprecated in future once legacy M-mode software are no longer in use. +config RISCV_BOOT_SPINWAIT + bool "Spinwait booting method" + depends on SMP + default y + help + This enables support for booting Linux via spinwait method. In the + spinwait method, all cores randomly jump to Linux. One of the cores + gets chosen via lottery and all other keep spinning on a percpu + variable. This method cannot support CPU hotplug and sparse hartid + scheme. It should be only enabled for M-mode Linux or platforms relying + on older firmware without SBI HSM extension. All other platforms should + rely on ordered booting via SBI HSM extension which gets chosen + dynamically at runtime if the firmware supports it. + config KEXEC bool "Kexec system call" select KEXEC_CORE diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 8a107ed18b0dc71b7804203540078044cddd1976..7d81102cffd48e38e7c16c4782655d4653dc127b 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -50,6 +50,12 @@ riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c + +# Newer binutils versions default to ISA spec version 20191213 which moves some +# instructions from the I extension to the Zicsr and Zifencei extensions. +toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) +riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei + KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_AFLAGS += -march=$(riscv-march-y) diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts index 6bfa1f24d3deb45e493a0d4bb8bc75b1f06b6ffa..c4ed9efdff0322070cd7dd8fd41aae5e218f4bbe 100644 --- a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts +++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts @@ -39,6 +39,11 @@ clock-frequency = ; clock-output-names = "rtcclk"; }; + + gpio-poweroff { + compatible = "gpio-poweroff"; + gpios = <&gpio 2 GPIO_ACTIVE_LOW>; + }; }; &uart0 { diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index e8ceab678e8bb6a38e4400a7cbd9bbf401ff1a2f..3f42ed87dde80754447247417a0388860f2a00d5 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -29,7 +29,6 @@ CONFIG_EMBEDDED=y CONFIG_SLOB=y # CONFIG_MMU is not set CONFIG_SOC_CANAAN=y -CONFIG_MAXPHYSMEM_2GB=y CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_CMDLINE="earlycon console=ttySIF0" diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 46aa3879f19ce13486f780f81632ea5db9642e40..2a82a3b2992b2824c29efd1cd1902b6c9d3b2659 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -21,7 +21,6 @@ CONFIG_EMBEDDED=y CONFIG_SLOB=y # CONFIG_MMU is not set CONFIG_SOC_CANAAN=y -CONFIG_MAXPHYSMEM_2GB=y CONFIG_SMP=y CONFIG_NR_CPUS=2 CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro" diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index 385cca741b01673a13015217951370bd301a7288..e1c9864b62376dedd8c297e7b4a4a835953ab326 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -24,10 +24,8 @@ CONFIG_EXPERT=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_COMPAT_BRK is not set CONFIG_SLOB=y -# CONFIG_SLAB_MERGE_DEFAULT is not set # CONFIG_MMU is not set CONFIG_SOC_VIRT=y -CONFIG_MAXPHYSMEM_2GB=y CONFIG_SMP=y CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0" CONFIG_CMDLINE_FORCE=y diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 396a3303c537489bc8b1b7a52a9334e07b5ceda3..3540b690944bee8d5309a3d43346e3ecea47c361 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h index a8ec3c5c1bd265ddaff29d158d3c5ecf9e959309..134590f1b84359f47da63b838d1300d63d456aca 100644 --- a/arch/riscv/include/asm/cpu_ops.h +++ b/arch/riscv/include/asm/cpu_ops.h @@ -40,7 +40,5 @@ struct cpu_operations { extern const struct cpu_operations *cpu_ops[NR_CPUS]; void __init cpu_set_ops(int cpu); -void cpu_update_secondary_bootdata(unsigned int cpuid, - struct task_struct *tidle); #endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/riscv/include/asm/cpu_ops_sbi.h b/arch/riscv/include/asm/cpu_ops_sbi.h new file mode 100644 index 0000000000000000000000000000000000000000..56e4b76d09ff5317b17d08450b242c940908bea4 --- /dev/null +++ b/arch/riscv/include/asm/cpu_ops_sbi.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021 by Rivos Inc. + */ +#ifndef __ASM_CPU_OPS_SBI_H +#define __ASM_CPU_OPS_SBI_H + +#ifndef __ASSEMBLY__ +#include +#include +#include + +/** + * struct sbi_hart_boot_data - Hart specific boot used during booting and + * cpu hotplug. + * @task_ptr: A pointer to the hart specific tp + * @stack_ptr: A pointer to the hart specific sp + */ +struct sbi_hart_boot_data { + void *task_ptr; + void *stack_ptr; +}; +#endif + +#endif /* ifndef __ASM_CPU_OPS_SBI_H */ diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 5046f431645cb828e18ec937b095860ccbf2f204..ae711692eec94961403ea546a1b79eda0bc50977 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -40,14 +40,13 @@ #ifndef CONFIG_64BIT #define SATP_PPN _AC(0x003FFFFF, UL) #define SATP_MODE_32 _AC(0x80000000, UL) -#define SATP_MODE SATP_MODE_32 #define SATP_ASID_BITS 9 #define SATP_ASID_SHIFT 22 #define SATP_ASID_MASK _AC(0x1FF, UL) #else #define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL) #define SATP_MODE_39 _AC(0x8000000000000000, UL) -#define SATP_MODE SATP_MODE_39 +#define SATP_MODE_48 _AC(0x9000000000000000, UL) #define SATP_ASID_BITS 16 #define SATP_ASID_SHIFT 44 #define SATP_ASID_MASK _AC(0xFFFF, UL) diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 54cbf07fb4e9647a73d63cdc3c15595dc01e6b94..58a718573ad6db715c96d9a4ec5492c2506b2fb3 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -24,6 +24,7 @@ enum fixed_addresses { FIX_HOLE, FIX_PTE, FIX_PMD, + FIX_PUD, FIX_TEXT_POKE1, FIX_TEXT_POKE0, FIX_EARLYCON_MEM_BASE, diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h index b00f503ec1248a5eea682027ca4a5d673d105fae..0b85e363e778c9fb15fd4f1052049f54b4b1a4e4 100644 --- a/arch/riscv/include/asm/kasan.h +++ b/arch/riscv/include/asm/kasan.h @@ -27,13 +27,18 @@ */ #define KASAN_SHADOW_SCALE_SHIFT 3 -#define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT)) -#define KASAN_SHADOW_START KERN_VIRT_START -#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) +#define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT)) +/* + * Depending on the size of the virtual address space, the region may not be + * aligned on PGDIR_SIZE, so force its alignment to ease its population. + */ +#define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK) +#define KASAN_SHADOW_END MODULES_LOWEST_VADDR #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) void kasan_init(void); asmlinkage void kasan_early_init(void); +void kasan_swapper_init(void); #endif #endif diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index b3e5ff0125fe48ce7398c60e14a241492aa9660d..160e3a1e8f8be72f0a12b9d24c0a9dab45b0e60d 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -31,9 +31,20 @@ * When not using MMU this corresponds to the first free page in * physical memory (aligned on a page boundary). */ +#ifdef CONFIG_64BIT +#ifdef CONFIG_MMU +#define PAGE_OFFSET kernel_map.page_offset +#else #define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) - -#define KERN_VIRT_SIZE (-PAGE_OFFSET) +#endif +/* + * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so + * define the PAGE_OFFSET value for SV39. + */ +#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL) +#else +#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL) +#endif /* CONFIG_64BIT */ #ifndef __ASSEMBLY__ @@ -86,6 +97,7 @@ extern unsigned long riscv_pfn_base; #endif /* CONFIG_MMU */ struct kernel_mapping { + unsigned long page_offset; unsigned long virt_addr; uintptr_t phys_addr; uintptr_t size; diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 0af6933a7100de2acd1659751176224024c6f1c9..11823004b87a30324805d5c8fd65f4256f166b52 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -11,6 +11,8 @@ #include #ifdef CONFIG_MMU +#define __HAVE_ARCH_PUD_ALLOC_ONE +#define __HAVE_ARCH_PUD_FREE #include static inline void pmd_populate_kernel(struct mm_struct *mm, @@ -36,6 +38,44 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); } + +static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) +{ + if (pgtable_l4_enabled) { + unsigned long pfn = virt_to_pfn(pud); + + set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); + } +} + +static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, + pud_t *pud) +{ + if (pgtable_l4_enabled) { + unsigned long pfn = virt_to_pfn(pud); + + set_p4d_safe(p4d, + __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE)); + } +} + +#define pud_alloc_one pud_alloc_one +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + if (pgtable_l4_enabled) + return __pud_alloc_one(mm, addr); + + return NULL; +} + +#define pud_free pud_free +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + if (pgtable_l4_enabled) + __pud_free(mm, pud); +} + +#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud) #endif /* __PAGETABLE_PMD_FOLDED */ static inline pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 228261aa962868d3deec7776b7a17caff7cf1d32..bbbdd66e5e2fe351d3f653a36b275cb2db06b714 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -8,16 +8,36 @@ #include -#define PGDIR_SHIFT 30 +extern bool pgtable_l4_enabled; + +#define PGDIR_SHIFT_L3 30 +#define PGDIR_SHIFT_L4 39 +#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3) + +#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3) /* Size of region mapped by a page global directory */ #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) +/* pud is folded into pgd in case of 3-level page table */ +#define PUD_SHIFT 30 +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE - 1)) + #define PMD_SHIFT 21 /* Size of region mapped by a page middle directory */ #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE - 1)) +/* Page Upper Directory entry */ +typedef struct { + unsigned long pud; +} pud_t; + +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) }) +#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t)) + /* Page Middle Directory entry */ typedef struct { unsigned long pmd; @@ -59,6 +79,16 @@ static inline void pud_clear(pud_t *pudp) set_pud(pudp, __pud(0)); } +static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot) +{ + return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); +} + +static inline unsigned long _pud_pfn(pud_t pud) +{ + return pud_val(pud) >> _PAGE_PFN_SHIFT; +} + static inline pmd_t *pud_pgtable(pud_t pud) { return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT); @@ -69,6 +99,17 @@ static inline struct page *pud_page(pud_t pud) return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT); } +#define mm_pud_folded mm_pud_folded +static inline bool mm_pud_folded(struct mm_struct *mm) +{ + if (pgtable_l4_enabled) + return false; + + return true; +} + +#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) + static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) { return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); @@ -84,4 +125,69 @@ static inline unsigned long _pmd_pfn(pmd_t pmd) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pud_ERROR(e) \ + pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) + +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) +{ + if (pgtable_l4_enabled) + *p4dp = p4d; + else + set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) }); +} + +static inline int p4d_none(p4d_t p4d) +{ + if (pgtable_l4_enabled) + return (p4d_val(p4d) == 0); + + return 0; +} + +static inline int p4d_present(p4d_t p4d) +{ + if (pgtable_l4_enabled) + return (p4d_val(p4d) & _PAGE_PRESENT); + + return 1; +} + +static inline int p4d_bad(p4d_t p4d) +{ + if (pgtable_l4_enabled) + return !p4d_present(p4d); + + return 0; +} + +static inline void p4d_clear(p4d_t *p4d) +{ + if (pgtable_l4_enabled) + set_p4d(p4d, __p4d(0)); +} + +static inline pud_t *p4d_pgtable(p4d_t p4d) +{ + if (pgtable_l4_enabled) + return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT); + + return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) }); +} + +static inline struct page *p4d_page(p4d_t p4d) +{ + return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT); +} + +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +#define pud_offset pud_offset +static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +{ + if (pgtable_l4_enabled) + return p4d_pgtable(*p4d) + pud_index(address); + + return (pud_t *)p4d; +} + #endif /* _ASM_RISCV_PGTABLE_64_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 67f687aee673e33dc17130b923a699c8c5567c82..7e949f25c93347728a6f87761d4f1396a483c7b7 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -24,6 +24,17 @@ #define KERNEL_LINK_ADDR PAGE_OFFSET #endif +/* Number of entries in the page global directory */ +#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t)) +/* Number of entries in the page table */ +#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t)) + +/* + * Half of the kernel address space (half of the entries of the page global + * directory) is for the direct mapping. + */ +#define KERN_VIRT_SIZE ((PTRS_PER_PGD / 2 * PGDIR_SIZE) / 2) + #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) #define VMALLOC_END PAGE_OFFSET #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) @@ -39,8 +50,10 @@ /* Modules always live before the kernel */ #ifdef CONFIG_64BIT -#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) -#define MODULES_END (PFN_ALIGN((unsigned long)&_start)) +/* This is used to define the end of the KASAN shadow region */ +#define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G) +#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) +#define MODULES_END (PFN_ALIGN((unsigned long)&_start)) #endif /* @@ -48,8 +61,14 @@ * struct pages to map half the virtual address space. Then * position vmemmap directly below the VMALLOC region. */ +#ifdef CONFIG_64BIT +#define VA_BITS (pgtable_l4_enabled ? 48 : 39) +#else +#define VA_BITS 32 +#endif + #define VMEMMAP_SHIFT \ - (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) + (VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) #define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) #define VMEMMAP_END VMALLOC_START #define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) @@ -83,8 +102,7 @@ #ifndef __ASSEMBLY__ -/* Page Upper Directory not used in RISC-V */ -#include +#include #include #include #include @@ -107,12 +125,20 @@ #define XIP_FIXUP(addr) (addr) #endif /* CONFIG_XIP_KERNEL */ -#ifdef CONFIG_MMU -/* Number of entries in the page global directory */ -#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t)) -/* Number of entries in the page table */ -#define PTRS_PER_PTE (PAGE_SIZE / sizeof(pte_t)) +struct pt_alloc_ops { + pte_t *(*get_pte_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pte)(uintptr_t va); +#ifndef __PAGETABLE_PMD_FOLDED + pmd_t *(*get_pmd_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pmd)(uintptr_t va); + pud_t *(*get_pud_virt)(phys_addr_t pa); + phys_addr_t (*alloc_pud)(uintptr_t va); +#endif +}; + +extern struct pt_alloc_ops pt_ops __initdata; +#ifdef CONFIG_MMU /* Number of PGD entries that a user-mode program can use */ #define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE) @@ -659,7 +685,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, * and give the kernel the other (upper) half. */ #ifdef CONFIG_64BIT -#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE) +#define KERN_VIRT_START (-(BIT(VA_BITS)) + TASK_SIZE) #else #define KERN_VIRT_START FIXADDR_START #endif @@ -667,11 +693,22 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, /* * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. * Note that PGDIR_SIZE must evenly divide TASK_SIZE. + * Task size is: + * - 0x9fc00000 (~2.5GB) for RV32. + * - 0x4000000000 ( 256GB) for RV64 using SV39 mmu + * - 0x800000000000 ( 128TB) for RV64 using SV48 mmu + * + * Note that PGDIR_SIZE must evenly divide TASK_SIZE since "RISC-V + * Instruction Set Manual Volume II: Privileged Architecture" states that + * "load and store effective addresses, which are 64bits, must have bits + * 63–48 all equal to bit 47, or else a page-fault exception will occur." */ #ifdef CONFIG_64BIT -#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2) +#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2) +#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2) #else -#define TASK_SIZE FIXADDR_START +#define TASK_SIZE FIXADDR_START +#define TASK_SIZE_MIN TASK_SIZE #endif #else /* CONFIG_MMU */ @@ -697,6 +734,8 @@ extern uintptr_t _dtb_early_pa; #define dtb_early_va _dtb_early_va #define dtb_early_pa _dtb_early_pa #endif /* CONFIG_XIP_KERNEL */ +extern u64 satp_mode; +extern bool pgtable_l4_enabled; void paging_init(void); void misc_mem_init(void); diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 26ba6f2d7a40de9eaf191691ee8f1d0378505077..d1c37479d828c1754178f0d5252b02ed164060ad 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -8,6 +8,7 @@ #define _ASM_RISCV_SBI_H #include +#include #ifdef CONFIG_RISCV_SBI enum sbi_ext_id { @@ -128,27 +129,27 @@ long sbi_get_mimpid(void); void sbi_set_timer(uint64_t stime_value); void sbi_shutdown(void); void sbi_clear_ipi(void); -int sbi_send_ipi(const unsigned long *hart_mask); -int sbi_remote_fence_i(const unsigned long *hart_mask); -int sbi_remote_sfence_vma(const unsigned long *hart_mask, +int sbi_send_ipi(const struct cpumask *cpu_mask); +int sbi_remote_fence_i(const struct cpumask *cpu_mask); +int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size); -int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, +int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long asid); -int sbi_remote_hfence_gvma(const unsigned long *hart_mask, +int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size); -int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask, +int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long vmid); -int sbi_remote_hfence_vvma(const unsigned long *hart_mask, +int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size); -int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask, +int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long asid); @@ -183,7 +184,7 @@ static inline unsigned long sbi_mk_version(unsigned long major, int sbi_err_map_linux_errno(int err); #else /* CONFIG_RISCV_SBI */ -static inline int sbi_remote_fence_i(const unsigned long *hart_mask) { return -1; } +static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; } static inline void sbi_init(void) {} #endif /* CONFIG_RISCV_SBI */ #endif /* _ASM_RISCV_SBI_H */ diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index 6ad749f42807fe187d72c1c3b6d43c4bc1e749fe..23170c933d733d866d976ecfc9baf72bbd2b827c 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -92,8 +92,6 @@ static inline void riscv_clear_ipi(void) #endif /* CONFIG_SMP */ -void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out); - #if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP) bool cpu_has_hotplug(unsigned int cpu); #else diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h index 45a7018a8118b911bedddbc39b984031657300ca..63acaecc3374785a72f646359b2f6f3e4821ac7f 100644 --- a/arch/riscv/include/asm/sparsemem.h +++ b/arch/riscv/include/asm/sparsemem.h @@ -4,7 +4,11 @@ #define _ASM_RISCV_SPARSEMEM_H #ifdef CONFIG_SPARSEMEM -#define MAX_PHYSMEM_BITS CONFIG_PA_BITS +#ifdef CONFIG_64BIT +#define MAX_PHYSMEM_BITS 56 +#else +#define MAX_PHYSMEM_BITS 34 +#endif /* CONFIG_64BIT */ #define SECTION_SIZE_BITS 27 #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 3397ddac1a30caeab19cc7ee7fc99ff58a52fd7a..612556faa527f583cf6fc3bc9a737adcaa18a105 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -43,7 +43,8 @@ obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SMP) += cpu_ops.o -obj-$(CONFIG_SMP) += cpu_ops_spinwait.o + +obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 253126e4beef68d37bfbdd53d7f7c47fe2df52c9..df0519a64eaf85a44a3e327fbf813d797948c36d 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -12,6 +12,7 @@ #include #include #include +#include void asm_offsets(void); @@ -468,4 +469,6 @@ void asm_offsets(void) DEFINE(PT_SIZE_ON_STACK, ALIGN(sizeof(struct pt_regs), STACK_ALIGN)); OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr); + OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr); + OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr); } diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c index be7f05b542bbdb5902c2324fd564402791fcb755..f7a832e3a1d1d44f0568a5f7336eb587092d85a0 100644 --- a/arch/riscv/kernel/cpu-hotplug.c +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -12,6 +12,7 @@ #include #include #include +#include #include bool cpu_has_hotplug(unsigned int cpu) @@ -40,6 +41,7 @@ int __cpu_disable(void) return ret; remove_cpu_topology(cpu); + numa_remove_cpu(cpu); set_cpu_online(cpu, false); irq_migrate_all_off_this_cpu(); diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index f13b2c9ea912d801f54d1819c1136facba71d372..ad0a7e9f828bef4806febe956ba3ef3875486722 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -7,6 +7,7 @@ #include #include #include +#include /* * Returns the hart ID of the given device tree node, or -ENODEV if the node @@ -71,18 +72,19 @@ static void print_isa(struct seq_file *f, const char *isa) seq_puts(f, "\n"); } -static void print_mmu(struct seq_file *f, const char *mmu_type) +static void print_mmu(struct seq_file *f) { + char sv_type[16]; + #if defined(CONFIG_32BIT) - if (strcmp(mmu_type, "riscv,sv32") != 0) - return; + strncpy(sv_type, "sv32", 5); #elif defined(CONFIG_64BIT) - if (strcmp(mmu_type, "riscv,sv39") != 0 && - strcmp(mmu_type, "riscv,sv48") != 0) - return; + if (pgtable_l4_enabled) + strncpy(sv_type, "sv48", 5); + else + strncpy(sv_type, "sv39", 5); #endif - - seq_printf(f, "mmu\t\t: %s\n", mmu_type+6); + seq_printf(f, "mmu\t\t: %s\n", sv_type); } static void *c_start(struct seq_file *m, loff_t *pos) @@ -107,14 +109,13 @@ static int c_show(struct seq_file *m, void *v) { unsigned long cpu_id = (unsigned long)v - 1; struct device_node *node = of_get_cpu_node(cpu_id, NULL); - const char *compat, *isa, *mmu; + const char *compat, *isa; seq_printf(m, "processor\t: %lu\n", cpu_id); seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id)); if (!of_property_read_string(node, "riscv,isa", &isa)) print_isa(m, isa); - if (!of_property_read_string(node, "mmu-type", &mmu)) - print_mmu(m, mmu); + print_mmu(m); if (!of_property_read_string(node, "compatible", &compat) && strcmp(compat, "riscv")) seq_printf(m, "uarch\t\t: %s\n", compat); diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c index 1985884fe8290bb290bbf8eaff4609b3e7398349..170d07e577215b2c40c23da0332b2a2c28a322a0 100644 --- a/arch/riscv/kernel/cpu_ops.c +++ b/arch/riscv/kernel/cpu_ops.c @@ -8,37 +8,29 @@ #include #include #include -#include #include #include #include const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -void *__cpu_up_stack_pointer[NR_CPUS] __section(".data"); -void *__cpu_up_task_pointer[NR_CPUS] __section(".data"); - extern const struct cpu_operations cpu_ops_sbi; +#ifdef CONFIG_RISCV_BOOT_SPINWAIT extern const struct cpu_operations cpu_ops_spinwait; - -void cpu_update_secondary_bootdata(unsigned int cpuid, - struct task_struct *tidle) -{ - int hartid = cpuid_to_hartid_map(cpuid); - - /* Make sure tidle is updated */ - smp_mb(); - WRITE_ONCE(__cpu_up_stack_pointer[hartid], - task_stack_page(tidle) + THREAD_SIZE); - WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); -} +#else +const struct cpu_operations cpu_ops_spinwait = { + .name = "", + .cpu_prepare = NULL, + .cpu_start = NULL, +}; +#endif void __init cpu_set_ops(int cpuid) { #if IS_ENABLED(CONFIG_RISCV_SBI) if (sbi_probe_extension(SBI_EXT_HSM) > 0) { if (!cpuid) - pr_info("SBI v0.2 HSM extension detected\n"); + pr_info("SBI HSM extension detected\n"); cpu_ops[cpuid] = &cpu_ops_sbi; } else #endif diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c index 685fae72b7f5c758d83e1ebe4fdf40eff7235848..dae29cbfe550b1df114333f47517dc1ce523bddf 100644 --- a/arch/riscv/kernel/cpu_ops_sbi.c +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -7,13 +7,22 @@ #include #include +#include #include +#include #include #include extern char secondary_start_sbi[]; const struct cpu_operations cpu_ops_sbi; +/* + * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can + * be invoked from multiple threads in parallel. Define a per cpu data + * to handle that. + */ +DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); + static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, unsigned long priv) { @@ -55,14 +64,19 @@ static int sbi_hsm_hart_get_status(unsigned long hartid) static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) { - int rc; unsigned long boot_addr = __pa_symbol(secondary_start_sbi); int hartid = cpuid_to_hartid_map(cpuid); - - cpu_update_secondary_bootdata(cpuid, tidle); - rc = sbi_hsm_hart_start(hartid, boot_addr, 0); - - return rc; + unsigned long hsm_data; + struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid); + + /* Make sure tidle is updated */ + smp_mb(); + bdata->task_ptr = tidle; + bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE; + /* Make sure boot data is updated */ + smp_mb(); + hsm_data = __pa(bdata); + return sbi_hsm_hart_start(hartid, boot_addr, hsm_data); } static int sbi_cpu_prepare(unsigned int cpuid) diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c index b2c957bb68c1fd87d35a03dc759806569109a032..346847f6c41c854ee719f123ecb087803b488fe9 100644 --- a/arch/riscv/kernel/cpu_ops_spinwait.c +++ b/arch/riscv/kernel/cpu_ops_spinwait.c @@ -6,11 +6,36 @@ #include #include #include +#include #include #include #include const struct cpu_operations cpu_ops_spinwait; +void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data"); +void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data"); + +static void cpu_update_secondary_bootdata(unsigned int cpuid, + struct task_struct *tidle) +{ + int hartid = cpuid_to_hartid_map(cpuid); + + /* + * The hartid must be less than NR_CPUS to avoid out-of-bound access + * errors for __cpu_spinwait_stack/task_pointer. That is not always possible + * for platforms with discontiguous hartid numbering scheme. That's why + * spinwait booting is not the recommended approach for any platforms + * booting Linux in S-mode and can be disabled in the future. + */ + if (hartid == INVALID_HARTID || hartid >= NR_CPUS) + return; + + /* Make sure tidle is updated */ + smp_mb(); + WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], + task_stack_page(tidle) + THREAD_SIZE); + WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle); +} static int spinwait_cpu_prepare(unsigned int cpuid) { @@ -28,7 +53,7 @@ static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle) * selects the first cpu to boot the kernel and causes the remainder * of the cpus to spin in a loop waiting for their stack pointer to be * setup by that main cpu. Writing to bootdata - * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they + * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they * can continue the boot process. */ cpu_update_secondary_bootdata(cpuid, tidle); diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 604d60292dd8365433d0f6c3ca2dab3c5c7de348..ec07f991866a55d9ccad2643f045eed78ecc8f89 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include "efi-header.S" @@ -21,14 +22,13 @@ add \reg, \reg, t0 .endm .macro XIP_FIXUP_FLASH_OFFSET reg - la t1, __data_loc - li t0, XIP_OFFSET_MASK - and t1, t1, t0 - li t1, XIP_OFFSET - sub t0, t0, t1 - sub \reg, \reg, t0 + la t0, __data_loc + REG_L t1, _xip_phys_offset + sub \reg, \reg, t1 + add \reg, \reg, t0 .endm _xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET +_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET #else .macro XIP_FIXUP_OFFSET reg .endm @@ -105,7 +105,8 @@ relocate: /* Compute satp for kernel page tables, but don't load it yet */ srl a2, a0, PAGE_SHIFT - li a1, SATP_MODE + la a1, satp_mode + REG_L a1, 0(a1) or a2, a2, a1 /* @@ -167,15 +168,15 @@ secondary_start_sbi: la a3, .Lsecondary_park csrw CSR_TVEC, a3 - slli a3, a0, LGREG - la a4, __cpu_up_stack_pointer - XIP_FIXUP_OFFSET a4 - la a5, __cpu_up_task_pointer - XIP_FIXUP_OFFSET a5 - add a4, a3, a4 - add a5, a3, a5 - REG_L sp, (a4) - REG_L tp, (a5) + /* a0 contains the hartid & a1 contains boot data */ + li a2, SBI_HART_BOOT_TASK_PTR_OFFSET + XIP_FIXUP_OFFSET a2 + add a2, a2, a1 + REG_L tp, (a2) + li a3, SBI_HART_BOOT_STACK_PTR_OFFSET + XIP_FIXUP_OFFSET a3 + add a3, a3, a1 + REG_L sp, (a3) .Lsecondary_start_common: @@ -257,13 +258,13 @@ pmp_done: li t0, SR_FS csrc CSR_STATUS, t0 -#ifdef CONFIG_SMP +#ifdef CONFIG_RISCV_BOOT_SPINWAIT li t0, CONFIG_NR_CPUS blt a0, t0, .Lgood_cores tail .Lsecondary_park .Lgood_cores: -#endif + /* The lottery system is only required for spinwait booting method */ #ifndef CONFIG_XIP_KERNEL /* Pick one hart to run the main boot sequence */ la a3, hart_lottery @@ -282,6 +283,10 @@ pmp_done: /* first time here if hart_lottery in RAM is not set */ beq t0, t1, .Lsecondary_start +#endif /* CONFIG_XIP */ +#endif /* CONFIG_RISCV_BOOT_SPINWAIT */ + +#ifdef CONFIG_XIP_KERNEL la sp, _end + THREAD_SIZE XIP_FIXUP_OFFSET sp mv s0, a0 @@ -338,16 +343,16 @@ clear_bss_done: call soc_early_init tail start_kernel +#if CONFIG_RISCV_BOOT_SPINWAIT .Lsecondary_start: -#ifdef CONFIG_SMP /* Set trap vector to spin forever to help debug */ la a3, .Lsecondary_park csrw CSR_TVEC, a3 slli a3, a0, LGREG - la a1, __cpu_up_stack_pointer + la a1, __cpu_spinwait_stack_pointer XIP_FIXUP_OFFSET a1 - la a2, __cpu_up_task_pointer + la a2, __cpu_spinwait_task_pointer XIP_FIXUP_OFFSET a2 add a1, a3, a1 add a2, a3, a2 @@ -365,7 +370,7 @@ clear_bss_done: fence tail .Lsecondary_start_common -#endif +#endif /* CONFIG_RISCV_BOOT_SPINWAIT */ END(_start_kernel) diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h index aabbc3ac3e48e9a699514f0f33f2d652f4d6bea4..726731ada5349c3e5cc58765ce5d94dae0edfb58 100644 --- a/arch/riscv/kernel/head.h +++ b/arch/riscv/kernel/head.h @@ -16,7 +16,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa); asmlinkage void __init __copy_data(void); #endif -extern void *__cpu_up_stack_pointer[]; -extern void *__cpu_up_task_pointer[]; +#ifdef CONFIG_RISCV_BOOT_SPINWAIT +extern void *__cpu_spinwait_stack_pointer[]; +extern void *__cpu_spinwait_task_pointer[]; +#endif #endif /* __ASM_HEAD_H */ diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 9c0511119bad9bada528a858ea5e82dc1453b060..a892437301538653778c77785a72cb3ffb66b823 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -42,12 +42,10 @@ static int riscv_gpr_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - int ret; struct pt_regs *regs; regs = task_pt_regs(target); - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1); - return ret; + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1); } #ifdef CONFIG_FPU diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 9a84f0cb5175115694ba0e8e33fe2bb7b217aaeb..f72527fcb34709a69b83f667f2cd7c428d820262 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -16,8 +16,8 @@ unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT; EXPORT_SYMBOL(sbi_spec_version); static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init; -static int (*__sbi_send_ipi)(const unsigned long *hart_mask) __ro_after_init; -static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask, +static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init; +static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) __ro_after_init; @@ -67,6 +67,30 @@ int sbi_err_map_linux_errno(int err) EXPORT_SYMBOL(sbi_err_map_linux_errno); #ifdef CONFIG_RISCV_SBI_V01 +static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask) +{ + unsigned long cpuid, hartid; + unsigned long hmask = 0; + + /* + * There is no maximum hartid concept in RISC-V and NR_CPUS must not be + * associated with hartid. As SBI v0.1 is only kept for backward compatibility + * and will be removed in the future, there is no point in supporting hartid + * greater than BITS_PER_LONG (32 for RV32 and 64 for RV64). Ideally, SBI v0.2 + * should be used for platforms with hartid greater than BITS_PER_LONG. + */ + for_each_cpu(cpuid, cpu_mask) { + hartid = cpuid_to_hartid_map(cpuid); + if (hartid >= BITS_PER_LONG) { + pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n"); + break; + } + hmask |= 1 << hartid; + } + + return hmask; +} + /** * sbi_console_putchar() - Writes given character to the console device. * @ch: The data to be written to the console. @@ -132,33 +156,44 @@ static void __sbi_set_timer_v01(uint64_t stime_value) #endif } -static int __sbi_send_ipi_v01(const unsigned long *hart_mask) +static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) { - sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask, + unsigned long hart_mask; + + if (!cpu_mask) + cpu_mask = cpu_online_mask; + hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); + + sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask), 0, 0, 0, 0, 0); return 0; } -static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, +static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { int result = 0; + unsigned long hart_mask; + + if (!cpu_mask) + cpu_mask = cpu_online_mask; + hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); /* v0.2 function IDs are equivalent to v0.1 extension IDs */ switch (fid) { case SBI_EXT_RFENCE_REMOTE_FENCE_I: sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0, - (unsigned long)hart_mask, 0, 0, 0, 0, 0); + (unsigned long)&hart_mask, 0, 0, 0, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0, - (unsigned long)hart_mask, start, size, + (unsigned long)&hart_mask, start, size, 0, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0, - (unsigned long)hart_mask, start, size, + (unsigned long)&hart_mask, start, size, arg4, 0, 0); break; default: @@ -180,7 +215,7 @@ static void __sbi_set_timer_v01(uint64_t stime_value) sbi_major_version(), sbi_minor_version()); } -static int __sbi_send_ipi_v01(const unsigned long *hart_mask) +static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) { pr_warn("IPI extension is not available in SBI v%lu.%lu\n", sbi_major_version(), sbi_minor_version()); @@ -188,7 +223,7 @@ static int __sbi_send_ipi_v01(const unsigned long *hart_mask) return 0; } -static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, +static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { @@ -212,37 +247,33 @@ static void __sbi_set_timer_v02(uint64_t stime_value) #endif } -static int __sbi_send_ipi_v02(const unsigned long *hart_mask) +static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) { - unsigned long hartid, hmask_val, hbase; - struct cpumask tmask; + unsigned long hartid, cpuid, hmask = 0, hbase = 0; struct sbiret ret = {0}; int result; - if (!hart_mask || !(*hart_mask)) { - riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask); - hart_mask = cpumask_bits(&tmask); - } + if (!cpu_mask) + cpu_mask = cpu_online_mask; - hmask_val = 0; - hbase = 0; - for_each_set_bit(hartid, hart_mask, NR_CPUS) { - if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) { + for_each_cpu(cpuid, cpu_mask) { + hartid = cpuid_to_hartid_map(cpuid); + if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, - hmask_val, hbase, 0, 0, 0, 0); + hmask, hbase, 0, 0, 0, 0); if (ret.error) goto ecall_failed; - hmask_val = 0; + hmask = 0; hbase = 0; } - if (!hmask_val) + if (!hmask) hbase = hartid; - hmask_val |= 1UL << (hartid - hbase); + hmask |= 1UL << (hartid - hbase); } - if (hmask_val) { + if (hmask) { ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, - hmask_val, hbase, 0, 0, 0, 0); + hmask, hbase, 0, 0, 0, 0); if (ret.error) goto ecall_failed; } @@ -252,11 +283,11 @@ static int __sbi_send_ipi_v02(const unsigned long *hart_mask) ecall_failed: result = sbi_err_map_linux_errno(ret.error); pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", - __func__, hbase, hmask_val, result); + __func__, hbase, hmask, result); return result; } -static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val, +static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask, unsigned long hbase, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) @@ -267,31 +298,31 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val, switch (fid) { case SBI_EXT_RFENCE_REMOTE_FENCE_I: - ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0); + ret = sbi_ecall(ext, fid, hmask, hbase, 0, 0, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, arg4, 0); break; case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, arg4, 0); break; case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, 0, 0); break; case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: - ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + ret = sbi_ecall(ext, fid, hmask, hbase, start, size, arg4, 0); break; default: @@ -303,43 +334,39 @@ static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val, if (ret.error) { result = sbi_err_map_linux_errno(ret.error); pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", - __func__, hbase, hmask_val, result); + __func__, hbase, hmask, result); } return result; } -static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask, +static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long arg4, unsigned long arg5) { - unsigned long hmask_val, hartid, hbase; - struct cpumask tmask; + unsigned long hartid, cpuid, hmask = 0, hbase = 0; int result; - if (!hart_mask || !(*hart_mask)) { - riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask); - hart_mask = cpumask_bits(&tmask); - } + if (!cpu_mask) + cpu_mask = cpu_online_mask; - hmask_val = 0; - hbase = 0; - for_each_set_bit(hartid, hart_mask, NR_CPUS) { - if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) { - result = __sbi_rfence_v02_call(fid, hmask_val, hbase, + for_each_cpu(cpuid, cpu_mask) { + hartid = cpuid_to_hartid_map(cpuid); + if (hmask && ((hbase + BITS_PER_LONG) <= hartid)) { + result = __sbi_rfence_v02_call(fid, hmask, hbase, start, size, arg4, arg5); if (result) return result; - hmask_val = 0; + hmask = 0; hbase = 0; } - if (!hmask_val) + if (!hmask) hbase = hartid; - hmask_val |= 1UL << (hartid - hbase); + hmask |= 1UL << (hartid - hbase); } - if (hmask_val) { - result = __sbi_rfence_v02_call(fid, hmask_val, hbase, + if (hmask) { + result = __sbi_rfence_v02_call(fid, hmask, hbase, start, size, arg4, arg5); if (result) return result; @@ -361,44 +388,44 @@ void sbi_set_timer(uint64_t stime_value) /** * sbi_send_ipi() - Send an IPI to any hart. - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * * Return: 0 on success, appropriate linux error code otherwise. */ -int sbi_send_ipi(const unsigned long *hart_mask) +int sbi_send_ipi(const struct cpumask *cpu_mask) { - return __sbi_send_ipi(hart_mask); + return __sbi_send_ipi(cpu_mask); } EXPORT_SYMBOL(sbi_send_ipi); /** * sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts. - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * * Return: 0 on success, appropriate linux error code otherwise. */ -int sbi_remote_fence_i(const unsigned long *hart_mask) +int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I, - hart_mask, 0, 0, 0, 0); + cpu_mask, 0, 0, 0, 0); } EXPORT_SYMBOL(sbi_remote_fence_i); /** * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote * harts for the specified virtual address range. - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the virtual address * @size: Total size of the virtual address range. * * Return: 0 on success, appropriate linux error code otherwise. */ -int sbi_remote_sfence_vma(const unsigned long *hart_mask, +int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, - hart_mask, start, size, 0, 0); + cpu_mask, start, size, 0, 0); } EXPORT_SYMBOL(sbi_remote_sfence_vma); @@ -406,38 +433,38 @@ EXPORT_SYMBOL(sbi_remote_sfence_vma); * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given * remote harts for a virtual address range belonging to a specific ASID. * - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the virtual address * @size: Total size of the virtual address range. * @asid: The value of address space identifier (ASID). * * Return: 0 on success, appropriate linux error code otherwise. */ -int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, +int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long asid) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, - hart_mask, start, size, asid, 0); + cpu_mask, start, size, asid, 0); } EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); /** * sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote * harts for the specified guest physical address range. - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the guest physical address * @size: Total size of the guest physical address range. * * Return: None */ -int sbi_remote_hfence_gvma(const unsigned long *hart_mask, +int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, - hart_mask, start, size, 0, 0); + cpu_mask, start, size, 0, 0); } EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma); @@ -445,38 +472,38 @@ EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma); * sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given * remote harts for a guest physical address range belonging to a specific VMID. * - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the guest physical address * @size: Total size of the guest physical address range. * @vmid: The value of guest ID (VMID). * * Return: 0 if success, Error otherwise. */ -int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask, +int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long vmid) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, - hart_mask, start, size, vmid, 0); + cpu_mask, start, size, vmid, 0); } EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid); /** * sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote * harts for the current guest virtual address range. - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the current guest virtual address * @size: Total size of the current guest virtual address range. * * Return: None */ -int sbi_remote_hfence_vvma(const unsigned long *hart_mask, +int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask, unsigned long start, unsigned long size) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, - hart_mask, start, size, 0, 0); + cpu_mask, start, size, 0, 0); } EXPORT_SYMBOL(sbi_remote_hfence_vvma); @@ -485,20 +512,20 @@ EXPORT_SYMBOL(sbi_remote_hfence_vvma); * remote harts for current guest virtual address range belonging to a specific * ASID. * - * @hart_mask: A cpu mask containing all the target harts. + * @cpu_mask: A cpu mask containing all the target harts. * @start: Start of the current guest virtual address * @size: Total size of the current guest virtual address range. * @asid: The value of address space identifier (ASID). * * Return: None */ -int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask, +int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, unsigned long start, unsigned long size, unsigned long asid) { return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, - hart_mask, start, size, asid, 0); + cpu_mask, start, size, asid, 0); } EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid); @@ -591,11 +618,7 @@ long sbi_get_mimpid(void) static void sbi_send_cpumask_ipi(const struct cpumask *target) { - struct cpumask hartid_mask; - - riscv_cpuid_to_hartid_mask(target, &hartid_mask); - - sbi_send_ipi(cpumask_bits(&hartid_mask)); + sbi_send_ipi(target); } static const struct riscv_ipi_ops sbi_ipi_ops = { diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 63241abe84eb8036213ec5f851567465f5d11dc4..b42bfdc674823cec93ab3342235f12c5b867b611 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -59,16 +59,6 @@ atomic_t hart_lottery __section(".sdata") unsigned long boot_cpu_hartid; static DEFINE_PER_CPU(struct cpu, cpu_devices); -void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out) -{ - int cpu; - - cpumask_clear(out); - for_each_cpu(cpu, in) - cpumask_set_cpu(cpuid_to_hartid_map(cpu), out); -} -EXPORT_SYMBOL_GPL(riscv_cpuid_to_hartid_mask); - /* * Place kernel memory regions on the resource tree so that * kexec-tools can retrieve them from /proc/iomem. While there diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index bd82375db51a62cf1b84414a2a80d9738ca1d36c..622f226454d53d81cbc5cda9fd735fb6ed41d588 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -96,7 +96,7 @@ void __init setup_smp(void) if (cpuid >= NR_CPUS) { pr_warn("Invalid cpuid [%d] for hartid [%d]\n", cpuid, hart); - break; + continue; } cpuid_to_hartid_map(cpuid) = hart; diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 201ee206fb57d718e4a184fb93d1de53d90a8f4e..14d2b53ec322d86005d2b94ffa92c21e179d29df 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -22,15 +22,16 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { unsigned long fp, sp, pc; + int level = 0; if (regs) { fp = frame_pointer(regs); sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - fp = (unsigned long)__builtin_frame_address(1); - sp = (unsigned long)__builtin_frame_address(0); - pc = (unsigned long)__builtin_return_address(0); + fp = (unsigned long)__builtin_frame_address(0); + sp = sp_in_global; + pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ fp = task->thread.s[0]; @@ -42,7 +43,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, unsigned long low, high; struct stackframe *frame; - if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + if (unlikely(!__kernel_text_address(pc) || (level++ >= 1 && !fn(arg, pc)))) break; /* Validate frame pointer */ diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index 9af67dbdc66af7ce351df1146987cf0e1187216b..f80a34fbf10276fef888f5bcdedbff64cbeca5fa 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -114,7 +114,6 @@ static bool stage2_get_leaf_entry(struct kvm *kvm, gpa_t addr, static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr) { - struct cpumask hmask; unsigned long size = PAGE_SIZE; struct kvm_vmid *vmid = &kvm->arch.vmid; @@ -127,8 +126,7 @@ static void stage2_remote_tlb_flush(struct kvm *kvm, u32 level, gpa_t addr) * where the Guest/VM is running. */ preempt_disable(); - riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask); - sbi_remote_hfence_gvma_vmid(cpumask_bits(&hmask), addr, size, + sbi_remote_hfence_gvma_vmid(cpu_online_mask, addr, size, READ_ONCE(vmid->vmid)); preempt_enable(); } diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 0c5239e05721541785ba7b52de3c02b4fe80a99e..624166004e36c637fb5fbd8966f3412f72a84294 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -90,6 +90,7 @@ int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *cntx; + struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; @@ -106,6 +107,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) cntx->hstatus |= HSTATUS_SPVP; cntx->hstatus |= HSTATUS_SPV; + /* By default, make CY, TM, and IR counters accessible in VU mode */ + reset_csr->scounteren = 0x7; + /* Setup VCPU timer */ kvm_riscv_vcpu_timer_init(vcpu); @@ -699,6 +703,20 @@ static void kvm_riscv_update_hvip(struct kvm_vcpu *vcpu) csr_write(CSR_HVIP, csr->hvip); } +/* + * Actually run the vCPU, entering an RCU extended quiescent state (EQS) while + * the vCPU is running. + * + * This must be noinstr as instrumentation may make use of RCU, and this is not + * safe during the EQS. + */ +static void noinstr kvm_riscv_vcpu_enter_exit(struct kvm_vcpu *vcpu) +{ + guest_state_enter_irqoff(); + __kvm_riscv_switch_to(&vcpu->arch); + guest_state_exit_irqoff(); +} + int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { int ret; @@ -790,9 +808,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) continue; } - guest_enter_irqoff(); + guest_timing_enter_irqoff(); - __kvm_riscv_switch_to(&vcpu->arch); + kvm_riscv_vcpu_enter_exit(vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; vcpu->stat.exits++; @@ -812,25 +830,21 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) kvm_riscv_vcpu_sync_interrupts(vcpu); /* - * We may have taken a host interrupt in VS/VU-mode (i.e. - * while executing the guest). This interrupt is still - * pending, as we haven't serviced it yet! + * We must ensure that any pending interrupts are taken before + * we exit guest timing so that timer ticks are accounted as + * guest time. Transiently unmask interrupts so that any + * pending interrupts are taken. * - * We're now back in HS-mode with interrupts disabled - * so enabling the interrupts now will have the effect - * of taking the interrupt again, in HS-mode this time. + * There's no barrier which ensures that pending interrupts are + * recognised, so we just hope that the CPU takes any pending + * interrupts between the enable and disable. */ local_irq_enable(); + local_irq_disable(); - /* - * We do local_irq_enable() before calling guest_exit() so - * that if a timer interrupt hits while running the guest - * we account that tick as being spent in the guest. We - * enable preemption after calling guest_exit() so that if - * we get preempted we make sure ticks after that is not - * counted as guest time. - */ - guest_exit(); + guest_timing_exit_irqoff(); + + local_irq_enable(); preempt_enable(); diff --git a/arch/riscv/kvm/vcpu_sbi_base.c b/arch/riscv/kvm/vcpu_sbi_base.c index 4ecf377f483b866bfa4c09144fda662efd6de5f4..48f431091cdbc197f739de2a221b53ccc254ccbe 100644 --- a/arch/riscv/kvm/vcpu_sbi_base.c +++ b/arch/riscv/kvm/vcpu_sbi_base.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -32,7 +33,7 @@ static int kvm_sbi_ext_base_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, *out_val = KVM_SBI_IMPID; break; case SBI_EXT_BASE_GET_IMP_VERSION: - *out_val = 0; + *out_val = LINUX_VERSION_CODE; break; case SBI_EXT_BASE_PROBE_EXT: if ((cp->a0 >= SBI_EXT_EXPERIMENTAL_START && diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c index 00036b7f83b9aa0272eeaf62df23672d09d40da3..1bc0608a5bfd35afbba0d048d9e7fddec633a93b 100644 --- a/arch/riscv/kvm/vcpu_sbi_replace.c +++ b/arch/riscv/kvm/vcpu_sbi_replace.c @@ -82,7 +82,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run { int ret = 0; unsigned long i; - struct cpumask cm, hm; + struct cpumask cm; struct kvm_vcpu *tmp; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; unsigned long hmask = cp->a0; @@ -90,7 +90,6 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run unsigned long funcid = cp->a6; cpumask_clear(&cm); - cpumask_clear(&hm); kvm_for_each_vcpu(i, tmp, vcpu->kvm) { if (hbase != -1UL) { if (tmp->vcpu_id < hbase) @@ -103,17 +102,15 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run cpumask_set_cpu(tmp->cpu, &cm); } - riscv_cpuid_to_hartid_mask(&cm, &hm); - switch (funcid) { case SBI_EXT_RFENCE_REMOTE_FENCE_I: - ret = sbi_remote_fence_i(cpumask_bits(&hm)); + ret = sbi_remote_fence_i(&cm); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: - ret = sbi_remote_hfence_vvma(cpumask_bits(&hm), cp->a2, cp->a3); + ret = sbi_remote_hfence_vvma(&cm, cp->a2, cp->a3); break; case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: - ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), cp->a2, + ret = sbi_remote_hfence_vvma_asid(&cm, cp->a2, cp->a3, cp->a4); break; case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: diff --git a/arch/riscv/kvm/vcpu_sbi_v01.c b/arch/riscv/kvm/vcpu_sbi_v01.c index 4c7e13ec9ccc0079a20752b9ca364e2c4a1e9104..07e2de14433a634505dba18aa70242b3cdad3d36 100644 --- a/arch/riscv/kvm/vcpu_sbi_v01.c +++ b/arch/riscv/kvm/vcpu_sbi_v01.c @@ -38,7 +38,7 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, int i, ret = 0; u64 next_cycle; struct kvm_vcpu *rvcpu; - struct cpumask cm, hm; + struct cpumask cm; struct kvm *kvm = vcpu->kvm; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; @@ -101,15 +101,12 @@ static int kvm_sbi_ext_v01_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, continue; cpumask_set_cpu(rvcpu->cpu, &cm); } - riscv_cpuid_to_hartid_mask(&cm, &hm); if (cp->a7 == SBI_EXT_0_1_REMOTE_FENCE_I) - ret = sbi_remote_fence_i(cpumask_bits(&hm)); + ret = sbi_remote_fence_i(&cm); else if (cp->a7 == SBI_EXT_0_1_REMOTE_SFENCE_VMA) - ret = sbi_remote_hfence_vvma(cpumask_bits(&hm), - cp->a1, cp->a2); + ret = sbi_remote_hfence_vvma(&cm, cp->a1, cp->a2); else - ret = sbi_remote_hfence_vvma_asid(cpumask_bits(&hm), - cp->a1, cp->a2, cp->a3); + ret = sbi_remote_hfence_vvma_asid(&cm, cp->a1, cp->a2, cp->a3); break; default: ret = -EINVAL; diff --git a/arch/riscv/kvm/vmid.c b/arch/riscv/kvm/vmid.c index 807228f8f409662b285e3f334115ba7cb2b14bc6..2fa4f7b1813dfab700083172e190bba6db738e13 100644 --- a/arch/riscv/kvm/vmid.c +++ b/arch/riscv/kvm/vmid.c @@ -67,7 +67,6 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu) { unsigned long i; struct kvm_vcpu *v; - struct cpumask hmask; struct kvm_vmid *vmid = &vcpu->kvm->arch.vmid; if (!kvm_riscv_stage2_vmid_ver_changed(vmid)) @@ -102,8 +101,7 @@ void kvm_riscv_stage2_vmid_update(struct kvm_vcpu *vcpu) * running, we force VM exits on all host CPUs using IPI and * flush all Guest TLBs. */ - riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask); - sbi_remote_hfence_gvma(cpumask_bits(&hmask), 0, 0); + sbi_remote_hfence_gvma(cpu_online_mask, 0, 0); } vmid->vmid = vmid_next; diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 89f81067e09edba0c9cacdafd41cc97ea2b0e19e..6cb7d96ad9c7bc5424185e82a8e60cc46975743d 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -67,10 +67,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local) */ smp_mb(); } else if (IS_ENABLED(CONFIG_RISCV_SBI)) { - cpumask_t hartid_mask; - - riscv_cpuid_to_hartid_mask(&others, &hartid_mask); - sbi_remote_fence_i(cpumask_bits(&hartid_mask)); + sbi_remote_fence_i(&others); } else { on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1); } diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index ea54cc0c91068efda8906e92902a5ef13829319d..7acbfbd14557e600ba69b888c7389fef64f72428 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -192,7 +192,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) switch_mm_fast: csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | ((cntx & asid_mask) << SATP_ASID_SHIFT) | - SATP_MODE); + satp_mode); if (need_flush_tlb) local_flush_tlb_all(); @@ -201,7 +201,7 @@ switch_mm_fast: static void set_mm_noasid(struct mm_struct *mm) { /* Switch the page table and blindly nuke entire local TLB */ - csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE); + csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode); local_flush_tlb_all(); } diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c index 05978f78579ff04df43661fab98c79f198142d54..35484d830fd6d7fe0a2a521bc736b1c5883afa51 100644 --- a/arch/riscv/mm/extable.c +++ b/arch/riscv/mm/extable.c @@ -33,7 +33,7 @@ static inline void regs_set_gpr(struct pt_regs *regs, unsigned int offset, if (unlikely(offset > MAX_REG_OFFSET)) return; - if (!offset) + if (offset) *(unsigned long *)((unsigned long)regs + offset) = val; } @@ -43,8 +43,8 @@ static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex, int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data); - regs_set_gpr(regs, reg_err, -EFAULT); - regs_set_gpr(regs, reg_zero, 0); + regs_set_gpr(regs, reg_err * sizeof(unsigned long), -EFAULT); + regs_set_gpr(regs, reg_zero * sizeof(unsigned long), 0); regs->epc = get_ex_fixup(ex); return true; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 0624c68331d82ec13857526f3ffcc33358e38e66..c27294128e182d77d8f0b7e29ac35b4549069943 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -37,13 +37,19 @@ EXPORT_SYMBOL(kernel_map); #define kernel_map (*(struct kernel_mapping *)XIP_FIXUP(&kernel_map)) #endif +#ifdef CONFIG_64BIT +u64 satp_mode = !IS_ENABLED(CONFIG_XIP_KERNEL) ? SATP_MODE_48 : SATP_MODE_39; +#else +u64 satp_mode = SATP_MODE_32; +#endif +EXPORT_SYMBOL(satp_mode); + +bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); +EXPORT_SYMBOL(pgtable_l4_enabled); + phys_addr_t phys_ram_base __ro_after_init; EXPORT_SYMBOL(phys_ram_base); -#ifdef CONFIG_XIP_KERNEL -extern char _xiprom[], _exiprom[], __data_loc; -#endif - unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -53,15 +59,6 @@ extern char _start[]; void *_dtb_early_va __initdata; uintptr_t _dtb_early_pa __initdata; -struct pt_alloc_ops { - pte_t *(*get_pte_virt)(phys_addr_t pa); - phys_addr_t (*alloc_pte)(uintptr_t va); -#ifndef __PAGETABLE_PMD_FOLDED - pmd_t *(*get_pmd_virt)(phys_addr_t pa); - phys_addr_t (*alloc_pmd)(uintptr_t va); -#endif -}; - static phys_addr_t dma32_phys_limit __initdata; static void __init zone_sizes_init(void) @@ -102,10 +99,14 @@ static void __init print_vm_layout(void) (unsigned long)VMALLOC_END); print_mlm("lowmem", (unsigned long)PAGE_OFFSET, (unsigned long)high_memory); -#ifdef CONFIG_64BIT - print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, - (unsigned long)ADDRESS_SPACE_END); + if (IS_ENABLED(CONFIG_64BIT)) { +#ifdef CONFIG_KASAN + print_mlm("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END); #endif + + print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, + (unsigned long)ADDRESS_SPACE_END); + } } #else static void print_vm_layout(void) { } @@ -130,18 +131,8 @@ void __init mem_init(void) print_vm_layout(); } -/* - * The default maximal physical memory size is -PAGE_OFFSET for 32-bit kernel, - * whereas for 64-bit kernel, the end of the virtual address space is occupied - * by the modules/BPF/kernel mappings which reduces the available size of the - * linear mapping. - * Limit the memory size via mem. - */ -#ifdef CONFIG_64BIT -static phys_addr_t memory_limit = -PAGE_OFFSET - SZ_4G; -#else -static phys_addr_t memory_limit = -PAGE_OFFSET; -#endif +/* Limit the memory size via mem. */ +static phys_addr_t memory_limit; static int __init early_mem(char *p) { @@ -162,35 +153,31 @@ early_param("mem", early_mem); static void __init setup_bootmem(void) { phys_addr_t vmlinux_end = __pa_symbol(&_end); - phys_addr_t vmlinux_start = __pa_symbol(&_start); - phys_addr_t __maybe_unused max_mapped_addr; - phys_addr_t phys_ram_end; + phys_addr_t max_mapped_addr; + phys_addr_t phys_ram_end, vmlinux_start; -#ifdef CONFIG_XIP_KERNEL - vmlinux_start = __pa_symbol(&_sdata); -#endif + if (IS_ENABLED(CONFIG_XIP_KERNEL)) + vmlinux_start = __pa_symbol(&_sdata); + else + vmlinux_start = __pa_symbol(&_start); memblock_enforce_memory_limit(memory_limit); - /* - * Reserve from the start of the kernel to the end of the kernel - */ -#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX) /* * Make sure we align the reservation on PMD_SIZE since we will * map the kernel in the linear mapping as read-only: we do not want * any allocation to happen between _end and the next pmd aligned page. */ - vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK; -#endif + if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK; + /* + * Reserve from the start of the kernel to the end of the kernel + */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - phys_ram_end = memblock_end_of_DRAM(); -#ifndef CONFIG_XIP_KERNEL - phys_ram_base = memblock_start_of_DRAM(); -#endif -#ifndef CONFIG_64BIT + if (!IS_ENABLED(CONFIG_XIP_KERNEL)) + phys_ram_base = memblock_start_of_DRAM(); /* * memblock allocator is not aware of the fact that last 4K bytes of * the addressable memory can not be mapped because of IS_ERR_VALUE @@ -200,10 +187,11 @@ static void __init setup_bootmem(void) * address space is occupied by the kernel mapping then this check must * be done as soon as the kernel mapping base address is determined. */ - max_mapped_addr = __pa(~(ulong)0); - if (max_mapped_addr == (phys_ram_end - 1)) - memblock_set_current_limit(max_mapped_addr - 4096); -#endif + if (!IS_ENABLED(CONFIG_64BIT)) { + max_mapped_addr = __pa(~(ulong)0); + if (max_mapped_addr == (phys_ram_end - 1)) + memblock_set_current_limit(max_mapped_addr - 4096); + } min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); @@ -229,13 +217,7 @@ static void __init setup_bootmem(void) } #ifdef CONFIG_MMU -static struct pt_alloc_ops _pt_ops __initdata; - -#ifdef CONFIG_XIP_KERNEL -#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops)) -#else -#define pt_ops _pt_ops -#endif +struct pt_alloc_ops pt_ops __initdata; unsigned long riscv_pfn_base __ro_after_init; EXPORT_SYMBOL(riscv_pfn_base); @@ -245,9 +227,12 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #ifdef CONFIG_XIP_KERNEL +#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) +#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base)) #define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) #define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) #define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) @@ -333,6 +318,16 @@ static pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); #define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) #endif /* CONFIG_XIP_KERNEL */ +static pud_t trampoline_pud[PTRS_PER_PUD] __page_aligned_bss; +static pud_t fixmap_pud[PTRS_PER_PUD] __page_aligned_bss; +static pud_t early_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); + +#ifdef CONFIG_XIP_KERNEL +#define trampoline_pud ((pud_t *)XIP_FIXUP(trampoline_pud)) +#define fixmap_pud ((pud_t *)XIP_FIXUP(fixmap_pud)) +#define early_pud ((pud_t *)XIP_FIXUP(early_pud)) +#endif /* CONFIG_XIP_KERNEL */ + static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) { /* Before MMU is enabled */ @@ -352,7 +347,7 @@ static pmd_t *__init get_pmd_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_pmd_early(uintptr_t va) { - BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); + BUG_ON((va - kernel_map.virt_addr) >> PUD_SHIFT); return (uintptr_t)early_pmd; } @@ -399,21 +394,97 @@ static void __init create_pmd_mapping(pmd_t *pmdp, create_pte_mapping(ptep, va, pa, sz, prot); } -#define pgd_next_t pmd_t -#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va) -#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa) +static pud_t *__init get_pud_virt_early(phys_addr_t pa) +{ + return (pud_t *)((uintptr_t)pa); +} + +static pud_t *__init get_pud_virt_fixmap(phys_addr_t pa) +{ + clear_fixmap(FIX_PUD); + return (pud_t *)set_fixmap_offset(FIX_PUD, pa); +} + +static pud_t *__init get_pud_virt_late(phys_addr_t pa) +{ + return (pud_t *)__va(pa); +} + +static phys_addr_t __init alloc_pud_early(uintptr_t va) +{ + /* Only one PUD is available for early mapping */ + BUG_ON((va - kernel_map.virt_addr) >> PGDIR_SHIFT); + + return (uintptr_t)early_pud; +} + +static phys_addr_t __init alloc_pud_fixmap(uintptr_t va) +{ + return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); +} + +static phys_addr_t alloc_pud_late(uintptr_t va) +{ + unsigned long vaddr; + + vaddr = __get_free_page(GFP_KERNEL); + BUG_ON(!vaddr); + return __pa(vaddr); +} + +static void __init create_pud_mapping(pud_t *pudp, + uintptr_t va, phys_addr_t pa, + phys_addr_t sz, pgprot_t prot) +{ + pmd_t *nextp; + phys_addr_t next_phys; + uintptr_t pud_index = pud_index(va); + + if (sz == PUD_SIZE) { + if (pud_val(pudp[pud_index]) == 0) + pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot); + return; + } + + if (pud_val(pudp[pud_index]) == 0) { + next_phys = pt_ops.alloc_pmd(va); + pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE); + nextp = pt_ops.get_pmd_virt(next_phys); + memset(nextp, 0, PAGE_SIZE); + } else { + next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index])); + nextp = pt_ops.get_pmd_virt(next_phys); + } + + create_pmd_mapping(nextp, va, pa, sz, prot); +} + +#define pgd_next_t pud_t +#define alloc_pgd_next(__va) (pgtable_l4_enabled ? \ + pt_ops.alloc_pud(__va) : pt_ops.alloc_pmd(__va)) +#define get_pgd_next_virt(__pa) (pgtable_l4_enabled ? \ + pt_ops.get_pud_virt(__pa) : (pgd_next_t *)pt_ops.get_pmd_virt(__pa)) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ - create_pmd_mapping(__nextp, __va, __pa, __sz, __prot) -#define fixmap_pgd_next fixmap_pmd + (pgtable_l4_enabled ? \ + create_pud_mapping(__nextp, __va, __pa, __sz, __prot) : \ + create_pmd_mapping((pmd_t *)__nextp, __va, __pa, __sz, __prot)) +#define fixmap_pgd_next (pgtable_l4_enabled ? \ + (uintptr_t)fixmap_pud : (uintptr_t)fixmap_pmd) +#define trampoline_pgd_next (pgtable_l4_enabled ? \ + (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd) +#define early_dtb_pgd_next (pgtable_l4_enabled ? \ + (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd) #else #define pgd_next_t pte_t #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) #define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa) #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ create_pte_mapping(__nextp, __va, __pa, __sz, __prot) -#define fixmap_pgd_next fixmap_pte +#define fixmap_pgd_next ((uintptr_t)fixmap_pte) +#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd) +#define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) -#endif +#endif /* __PAGETABLE_PMD_FOLDED */ void __init create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, @@ -452,6 +523,9 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) } #ifdef CONFIG_XIP_KERNEL +#define phys_ram_base (*(phys_addr_t *)XIP_FIXUP(&phys_ram_base)) +extern char _xiprom[], _exiprom[], __data_loc; + /* called from head.S with MMU off */ asmlinkage void __init __copy_data(void) { @@ -500,6 +574,57 @@ static __init pgprot_t pgprot_from_va(uintptr_t va) } #endif /* CONFIG_STRICT_KERNEL_RWX */ +#ifdef CONFIG_64BIT +static void __init disable_pgtable_l4(void) +{ + pgtable_l4_enabled = false; + kernel_map.page_offset = PAGE_OFFSET_L3; + satp_mode = SATP_MODE_39; +} + +/* + * There is a simple way to determine if 4-level is supported by the + * underlying hardware: establish 1:1 mapping in 4-level page table mode + * then read SATP to see if the configuration was taken into account + * meaning sv48 is supported. + */ +static __init void set_satp_mode(void) +{ + u64 identity_satp, hw_satp; + uintptr_t set_satp_mode_pmd; + + set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK; + create_pgd_mapping(early_pg_dir, + set_satp_mode_pmd, (uintptr_t)early_pud, + PGDIR_SIZE, PAGE_TABLE); + create_pud_mapping(early_pud, + set_satp_mode_pmd, (uintptr_t)early_pmd, + PUD_SIZE, PAGE_TABLE); + /* Handle the case where set_satp_mode straddles 2 PMDs */ + create_pmd_mapping(early_pmd, + set_satp_mode_pmd, set_satp_mode_pmd, + PMD_SIZE, PAGE_KERNEL_EXEC); + create_pmd_mapping(early_pmd, + set_satp_mode_pmd + PMD_SIZE, + set_satp_mode_pmd + PMD_SIZE, + PMD_SIZE, PAGE_KERNEL_EXEC); + + identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode; + + local_flush_tlb_all(); + csr_write(CSR_SATP, identity_satp); + hw_satp = csr_swap(CSR_SATP, 0ULL); + local_flush_tlb_all(); + + if (hw_satp != identity_satp) + disable_pgtable_l4(); + + memset(early_pg_dir, 0, PAGE_SIZE); + memset(early_pud, 0, PAGE_SIZE); + memset(early_pmd, 0, PAGE_SIZE); +} +#endif + /* * setup_vm() is called from head.S with MMU-off. * @@ -564,10 +689,15 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, - IS_ENABLED(CONFIG_64BIT) ? (uintptr_t)early_dtb_pmd : pa, + IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa, PGDIR_SIZE, IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); + if (pgtable_l4_enabled) { + create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, + (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); + } + if (IS_ENABLED(CONFIG_64BIT)) { create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, pa, PMD_SIZE, PAGE_KERNEL); @@ -589,11 +719,64 @@ static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) dtb_early_pa = dtb_pa; } +/* + * MMU is not enabled, the page tables are allocated directly using + * early_pmd/pud/p4d and the address returned is the physical one. + */ +void __init pt_ops_set_early(void) +{ + pt_ops.alloc_pte = alloc_pte_early; + pt_ops.get_pte_virt = get_pte_virt_early; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_early; + pt_ops.get_pmd_virt = get_pmd_virt_early; + pt_ops.alloc_pud = alloc_pud_early; + pt_ops.get_pud_virt = get_pud_virt_early; +#endif +} + +/* + * MMU is enabled but page table setup is not complete yet. + * fixmap page table alloc functions must be used as a means to temporarily + * map the allocated physical pages since the linear mapping does not exist yet. + * + * Note that this is called with MMU disabled, hence kernel_mapping_pa_to_va, + * but it will be used as described above. + */ +void __init pt_ops_set_fixmap(void) +{ + pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap); + pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap); +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap); + pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); + pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); + pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); +#endif +} + +/* + * MMU is enabled and page table setup is complete, so from now, we can use + * generic page allocation functions to setup page table. + */ +void __init pt_ops_set_late(void) +{ + pt_ops.alloc_pte = alloc_pte_late; + pt_ops.get_pte_virt = get_pte_virt_late; +#ifndef __PAGETABLE_PMD_FOLDED + pt_ops.alloc_pmd = alloc_pmd_late; + pt_ops.get_pmd_virt = get_pmd_virt_late; + pt_ops.alloc_pud = alloc_pud_late; + pt_ops.get_pud_virt = get_pud_virt_late; +#endif +} + asmlinkage void __init setup_vm(uintptr_t dtb_pa) { pmd_t __maybe_unused fix_bmap_spmd, fix_bmap_epmd; kernel_map.virt_addr = KERNEL_LINK_ADDR; + kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); #ifdef CONFIG_XIP_KERNEL kernel_map.xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; @@ -608,11 +791,24 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) kernel_map.phys_addr = (uintptr_t)(&_start); kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; #endif + +#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) + set_satp_mode(); +#endif + kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr; kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr); + /* + * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit + * kernel, whereas for 64-bit kernel, the end of the virtual address + * space is occupied by the modules/BPF/kernel mappings which reduces + * the available size of the linear mapping. + */ + memory_limit = KERN_VIRT_SIZE - (IS_ENABLED(CONFIG_64BIT) ? SZ_4G : 0); + /* Sanity check alignment and size */ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); BUG_ON((kernel_map.phys_addr % PMD_SIZE) != 0); @@ -625,23 +821,25 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K); #endif - pt_ops.alloc_pte = alloc_pte_early; - pt_ops.get_pte_virt = get_pte_virt_early; -#ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = alloc_pmd_early; - pt_ops.get_pmd_virt = get_pmd_virt_early; -#endif + pt_ops_set_early(); + /* Setup early PGD for fixmap */ create_pgd_mapping(early_pg_dir, FIXADDR_START, - (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); + fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE); #ifndef __PAGETABLE_PMD_FOLDED - /* Setup fixmap PMD */ + /* Setup fixmap PUD and PMD */ + if (pgtable_l4_enabled) + create_pud_mapping(fixmap_pud, FIXADDR_START, + (uintptr_t)fixmap_pmd, PUD_SIZE, PAGE_TABLE); create_pmd_mapping(fixmap_pmd, FIXADDR_START, (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); /* Setup trampoline PGD and PMD */ create_pgd_mapping(trampoline_pg_dir, kernel_map.virt_addr, - (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); + trampoline_pgd_next, PGDIR_SIZE, PAGE_TABLE); + if (pgtable_l4_enabled) + create_pud_mapping(trampoline_pud, kernel_map.virt_addr, + (uintptr_t)trampoline_pmd, PUD_SIZE, PAGE_TABLE); #ifdef CONFIG_XIP_KERNEL create_pmd_mapping(trampoline_pmd, kernel_map.virt_addr, kernel_map.xiprom, PMD_SIZE, PAGE_KERNEL_EXEC); @@ -669,7 +867,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap * range can not span multiple pmds. */ - BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); #ifndef __PAGETABLE_PMD_FOLDED @@ -694,6 +892,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN); } #endif + + pt_ops_set_fixmap(); } static void __init setup_vm_final(void) @@ -702,16 +902,6 @@ static void __init setup_vm_final(void) phys_addr_t pa, start, end; u64 i; - /** - * MMU is enabled at this point. But page table setup is not complete yet. - * fixmap page table alloc functions should be used at this point - */ - pt_ops.alloc_pte = alloc_pte_fixmap; - pt_ops.get_pte_virt = get_pte_virt_fixmap; -#ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = alloc_pmd_fixmap; - pt_ops.get_pmd_virt = get_pmd_virt_fixmap; -#endif /* Setup swapper PGD for fixmap */ create_pgd_mapping(swapper_pg_dir, FIXADDR_START, __pa_symbol(fixmap_pgd_next), @@ -736,26 +926,24 @@ static void __init setup_vm_final(void) } } -#ifdef CONFIG_64BIT /* Map the kernel */ - create_kernel_page_table(swapper_pg_dir, false); + if (IS_ENABLED(CONFIG_64BIT)) + create_kernel_page_table(swapper_pg_dir, false); + +#ifdef CONFIG_KASAN + kasan_swapper_init(); #endif /* Clear fixmap PTE and PMD mappings */ clear_fixmap(FIX_PTE); clear_fixmap(FIX_PMD); + clear_fixmap(FIX_PUD); /* Move to swapper page table */ - csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); + csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | satp_mode); local_flush_tlb_all(); - /* generic page allocation functions must be used to setup page table */ - pt_ops.alloc_pte = alloc_pte_late; - pt_ops.get_pte_virt = get_pte_virt_late; -#ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = alloc_pmd_late; - pt_ops.get_pmd_virt = get_pmd_virt_late; -#endif + pt_ops_set_late(); } #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) @@ -791,12 +979,10 @@ static void __init reserve_crashkernel(void) * since it doesn't make much sense and we have limited memory * resources. */ -#ifdef CONFIG_CRASH_DUMP if (is_kdump_kernel()) { pr_info("crashkernel: ignoring reservation request\n"); return; } -#endif ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 54294f83513d1b024fb232c3aee2373974bff362..f61f7ca6fe0fde512c6909eeec795f424123c0f6 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -11,45 +11,27 @@ #include #include -extern pgd_t early_pg_dir[PTRS_PER_PGD]; -asmlinkage void __init kasan_early_init(void) -{ - uintptr_t i; - pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START); - - BUILD_BUG_ON(KASAN_SHADOW_OFFSET != - KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); - - for (i = 0; i < PTRS_PER_PTE; ++i) - set_pte(kasan_early_shadow_pte + i, - mk_pte(virt_to_page(kasan_early_shadow_page), - PAGE_KERNEL)); - - for (i = 0; i < PTRS_PER_PMD; ++i) - set_pmd(kasan_early_shadow_pmd + i, - pfn_pmd(PFN_DOWN - (__pa((uintptr_t) kasan_early_shadow_pte)), - __pgprot(_PAGE_TABLE))); - - for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; - i += PGDIR_SIZE, ++pgd) - set_pgd(pgd, - pfn_pgd(PFN_DOWN - (__pa(((uintptr_t) kasan_early_shadow_pmd))), - __pgprot(_PAGE_TABLE))); - - /* init for swapper_pg_dir */ - pgd = pgd_offset_k(KASAN_SHADOW_START); - - for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END; - i += PGDIR_SIZE, ++pgd) - set_pgd(pgd, - pfn_pgd(PFN_DOWN - (__pa(((uintptr_t) kasan_early_shadow_pmd))), - __pgprot(_PAGE_TABLE))); +/* + * Kasan shadow region must lie at a fixed address across sv39, sv48 and sv57 + * which is right before the kernel. + * + * For sv39, the region is aligned on PGDIR_SIZE so we only need to populate + * the page global directory with kasan_early_shadow_pmd. + * + * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping + * must be divided as follows: + * - the first PGD entry, although incomplete, is populated with + * kasan_early_shadow_pud/p4d + * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d + * - the last PGD entry is shared with the kernel mapping so populated at the + * lower levels pud/p4d + * + * In addition, when shallow populating a kasan region (for example vmalloc), + * this region may also not be aligned on PGDIR size, so we must go down to the + * pud level too. + */ - local_flush_tlb_all(); -} +extern pgd_t early_pg_dir[PTRS_PER_PGD]; static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end) { @@ -73,15 +55,19 @@ static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE)); } -static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end) +static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; pmd_t *pmdp, *base_pmd; unsigned long next; - base_pmd = (pmd_t *)pgd_page_vaddr(*pgd); - if (base_pmd == lm_alias(kasan_early_shadow_pmd)) + if (pud_none(*pud)) { base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); + } else { + base_pmd = (pmd_t *)pud_pgtable(*pud); + if (base_pmd == lm_alias(kasan_early_shadow_pmd)) + base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE); + } pmdp = base_pmd + pmd_index(vaddr); @@ -105,59 +91,207 @@ static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned * it entirely, memblock could allocate a page at a physical address * where KASAN is not populated yet and then we'd get a page fault. */ - set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); + set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); +} + +static void __init kasan_populate_pud(pgd_t *pgd, + unsigned long vaddr, unsigned long end, + bool early) +{ + phys_addr_t phys_addr; + pud_t *pudp, *base_pud; + unsigned long next; + + if (early) { + /* + * We can't use pgd_page_vaddr here as it would return a linear + * mapping address but it is not mapped yet, but when populating + * early_pg_dir, we need the physical address and when populating + * swapper_pg_dir, we need the kernel virtual address so use + * pt_ops facility. + */ + base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd))); + } else { + base_pud = (pud_t *)pgd_page_vaddr(*pgd); + if (base_pud == lm_alias(kasan_early_shadow_pud)) + base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE); + } + + pudp = base_pud + pud_index(vaddr); + + do { + next = pud_addr_end(vaddr, end); + + if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) { + if (early) { + phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd)); + set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE)); + continue; + } else { + phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE); + if (phys_addr) { + set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL)); + continue; + } + } + } + + kasan_populate_pmd(pudp, vaddr, next); + } while (pudp++, vaddr = next, vaddr != end); + + /* + * Wait for the whole PGD to be populated before setting the PGD in + * the page table, otherwise, if we did set the PGD before populating + * it entirely, memblock could allocate a page at a physical address + * where KASAN is not populated yet and then we'd get a page fault. + */ + if (!early) + set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE)); } -static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end) +#define kasan_early_shadow_pgd_next (pgtable_l4_enabled ? \ + (uintptr_t)kasan_early_shadow_pud : \ + (uintptr_t)kasan_early_shadow_pmd) +#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \ + (pgtable_l4_enabled ? \ + kasan_populate_pud(pgdp, vaddr, next, early) : \ + kasan_populate_pmd((pud_t *)pgdp, vaddr, next)) + +static void __init kasan_populate_pgd(pgd_t *pgdp, + unsigned long vaddr, unsigned long end, + bool early) { phys_addr_t phys_addr; - pgd_t *pgdp = pgd_offset_k(vaddr); unsigned long next; do { next = pgd_addr_end(vaddr, end); - /* - * pgdp can't be none since kasan_early_init initialized all KASAN - * shadow region with kasan_early_shadow_pmd: if this is stillthe case, - * that means we can try to allocate a hugepage as a replacement. - */ - if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) && - IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { - phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); - if (phys_addr) { - set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); + if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) { + if (early) { + phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next); + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE)); continue; + } else if (pgd_page_vaddr(*pgdp) == + (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) { + /* + * pgdp can't be none since kasan_early_init + * initialized all KASAN shadow region with + * kasan_early_shadow_pud: if this is still the + * case, that means we can try to allocate a + * hugepage as a replacement. + */ + phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE); + if (phys_addr) { + set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL)); + continue; + } } } - kasan_populate_pmd(pgdp, vaddr, next); + kasan_populate_pgd_next(pgdp, vaddr, next, early); } while (pgdp++, vaddr = next, vaddr != end); } +asmlinkage void __init kasan_early_init(void) +{ + uintptr_t i; + + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != + KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); + + for (i = 0; i < PTRS_PER_PTE; ++i) + set_pte(kasan_early_shadow_pte + i, + mk_pte(virt_to_page(kasan_early_shadow_page), + PAGE_KERNEL)); + + for (i = 0; i < PTRS_PER_PMD; ++i) + set_pmd(kasan_early_shadow_pmd + i, + pfn_pmd(PFN_DOWN + (__pa((uintptr_t)kasan_early_shadow_pte)), + PAGE_TABLE)); + + if (pgtable_l4_enabled) { + for (i = 0; i < PTRS_PER_PUD; ++i) + set_pud(kasan_early_shadow_pud + i, + pfn_pud(PFN_DOWN + (__pa(((uintptr_t)kasan_early_shadow_pmd))), + PAGE_TABLE)); + } + + kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START), + KASAN_SHADOW_START, KASAN_SHADOW_END, true); + + local_flush_tlb_all(); +} + +void __init kasan_swapper_init(void) +{ + kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START), + KASAN_SHADOW_START, KASAN_SHADOW_END, true); + + local_flush_tlb_all(); +} + static void __init kasan_populate(void *start, void *end) { unsigned long vaddr = (unsigned long)start & PAGE_MASK; unsigned long vend = PAGE_ALIGN((unsigned long)end); - kasan_populate_pgd(vaddr, vend); + kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false); local_flush_tlb_all(); memset(start, KASAN_SHADOW_INIT, end - start); } +static void __init kasan_shallow_populate_pud(pgd_t *pgdp, + unsigned long vaddr, unsigned long end, + bool kasan_populate) +{ + unsigned long next; + pud_t *pudp, *base_pud; + pmd_t *base_pmd; + bool is_kasan_pmd; + + base_pud = (pud_t *)pgd_page_vaddr(*pgdp); + pudp = base_pud + pud_index(vaddr); + + if (kasan_populate) + memcpy(base_pud, (void *)kasan_early_shadow_pgd_next, + sizeof(pud_t) * PTRS_PER_PUD); + + do { + next = pud_addr_end(vaddr, end); + is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd)); + + if (is_kasan_pmd) { + base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); + } + } while (pudp++, vaddr = next, vaddr != end); +} + static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end) { unsigned long next; void *p; pgd_t *pgd_k = pgd_offset_k(vaddr); + bool is_kasan_pgd_next; do { next = pgd_addr_end(vaddr, end); - if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) { + is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) == + (unsigned long)lm_alias(kasan_early_shadow_pgd_next)); + + if (is_kasan_pgd_next) { p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); } + + if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) + continue; + + kasan_shallow_populate_pud(pgd_k, vaddr, next, is_kasan_pgd_next); } while (pgd_k++, vaddr = next, vaddr != end); } diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 64f8201237c24e736d18942d27b295b580efad61..37ed760d007c3eef9c302adda361cd4ad7d1db9d 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -32,7 +32,6 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { struct cpumask *cmask = mm_cpumask(mm); - struct cpumask hmask; unsigned int cpuid; bool broadcast; @@ -46,9 +45,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long asid = atomic_long_read(&mm->context.id); if (broadcast) { - riscv_cpuid_to_hartid_mask(cmask, &hmask); - sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), - start, size, asid); + sbi_remote_sfence_vma_asid(cmask, start, size, asid); } else if (size <= stride) { local_flush_tlb_page_asid(start, asid); } else { @@ -56,9 +53,7 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, } } else { if (broadcast) { - riscv_cpuid_to_hartid_mask(cmask, &hmask); - sbi_remote_sfence_vma(cpumask_bits(&hmask), - start, size); + sbi_remote_sfence_vma(cmask, start, size); } else if (size <= stride) { local_flush_tlb_page(start); } else { diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 293dd6e171ed5eaa90a38df426ccd09486e61119..0bcda99d1d682ebc0abb616c731e2c5964566585 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -497,7 +497,7 @@ static int add_exception_handler(const struct bpf_insn *insn, offset = pc - (long)&ex->insn; if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN)) return -ERANGE; - ex->insn = pc; + ex->insn = offset; /* * Since the extable follows the program, the fixup offset is always diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f6a9475cbc8c5473ac30f09485ed5918a7832db2..be9f39fd06df6741e28ae08333b819150873ea46 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -127,7 +127,6 @@ config S390 select GENERIC_CPU_AUTOPROBE select GENERIC_CPU_VULNERABILITIES select GENERIC_ENTRY - select GENERIC_FIND_FIRST_BIT select GENERIC_GETTIMEOFDAY select GENERIC_PTDUMP select GENERIC_SMP_IDLE_THREAD @@ -946,6 +945,9 @@ config S390_GUEST endmenu +config S390_MODULES_SANITY_TEST_HELPERS + def_bool n + menu "Selftests" config S390_UNWIND_SELFTEST @@ -972,4 +974,16 @@ config S390_KPROBES_SANITY_TEST Say N if you are unsure. +config S390_MODULES_SANITY_TEST + def_tristate n + depends on KUNIT + default KUNIT_ALL_TESTS + prompt "Enable s390 specific modules tests" + select S390_MODULES_SANITY_TEST_HELPERS + help + This option enables an s390 specific modules test. This option is + not useful for distributions or general kernels, but only for + kernel developers working on architecture code. + + Say N if you are unsure. endmenu diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 354e51dcb3e2779fc5c3ef5daf9ce08e7fa34f2f..498bed9b261b876a52122ec415515275b58cdaf5 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -63,6 +63,7 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m +CONFIG_S390_MODULES_SANITY_TEST=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y CONFIG_STATIC_KEYS_SELFTEST=y @@ -96,8 +97,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_CLEANCACHE=y -CONFIG_FRONTSWAP=y CONFIG_CMA_DEBUG=y CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y @@ -110,6 +109,7 @@ CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y CONFIG_GUP_TEST=y +CONFIG_ANON_VMA_NAME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -117,7 +117,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -186,7 +185,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m @@ -392,6 +390,7 @@ CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m +CONFIG_NET_SWITCHDEV=y CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y @@ -401,6 +400,7 @@ CONFIG_PCI_IOV=y CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m @@ -502,6 +502,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_ENGLEDER is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set @@ -512,7 +513,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -543,6 +543,7 @@ CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_TEHUTI is not set # CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VERTEXCOM is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set @@ -593,6 +594,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -757,9 +759,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -775,6 +774,8 @@ CONFIG_CRYPTO_GHASH_S390=m CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_CRC32_SELFTEST=y CONFIG_CRC4=m CONFIG_CRC7=m @@ -808,7 +809,6 @@ CONFIG_SLUB_DEBUG_ON=y CONFIG_SLUB_STATS=y CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_VM=y -CONFIG_DEBUG_VM_VMACACHE=y CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m @@ -820,12 +820,11 @@ CONFIG_PANIC_ON_OOPS=y CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y CONFIG_TEST_LOCKUP=m -CONFIG_DEBUG_TIMEKEEPING=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y -CONFIG_DEBUG_LOCKDEP=y CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_DEBUG_LOCKING_API_SELFTESTS=y +CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 8dee6c3782f35caafa340f656d3b9ccf5dff31d1..61e36b999f67e631038137d4c648510f84ef3ff3 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -61,6 +61,7 @@ CONFIG_APPLDATA_BASE=y CONFIG_KVM=m CONFIG_S390_UNWIND_SELFTEST=m CONFIG_S390_KPROBES_SANITY_TEST=m +CONFIG_S390_MODULES_SANITY_TEST=m CONFIG_KPROBES=y CONFIG_JUMP_LABEL=y # CONFIG_GCC_PLUGINS is not set @@ -91,8 +92,6 @@ CONFIG_MEMORY_HOTPLUG=y CONFIG_MEMORY_HOTREMOVE=y CONFIG_KSM=y CONFIG_TRANSPARENT_HUGEPAGE=y -CONFIG_CLEANCACHE=y -CONFIG_FRONTSWAP=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=7 CONFIG_MEM_SOFT_DIRTY=y @@ -102,6 +101,7 @@ CONFIG_ZSMALLOC_STAT=y CONFIG_DEFERRED_STRUCT_PAGE_INIT=y CONFIG_IDLE_PAGE_TRACKING=y CONFIG_PERCPU_STATS=y +CONFIG_ANON_VMA_NAME=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_PACKET_DIAG=m @@ -109,7 +109,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_NET_SWITCHDEV=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -178,7 +177,6 @@ CONFIG_NF_CT_NETLINK_TIMEOUT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_INET=y CONFIG_NFT_CT=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m CONFIG_NFT_NAT=m @@ -383,6 +381,7 @@ CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m CONFIG_VIRTIO_VSOCKETS=m CONFIG_NETLINK_DIAG=m +CONFIG_NET_SWITCHDEV=y CONFIG_CGROUP_NET_PRIO=y CONFIG_NET_PKTGEN=m CONFIG_PCI=y @@ -392,6 +391,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m @@ -493,6 +493,7 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_DEC is not set # CONFIG_NET_VENDOR_DLINK is not set # CONFIG_NET_VENDOR_EMULEX is not set +# CONFIG_NET_VENDOR_ENGLEDER is not set # CONFIG_NET_VENDOR_EZCHIP is not set # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set @@ -503,7 +504,6 @@ CONFIG_NLMON=m CONFIG_MLX4_EN=m CONFIG_MLX5_CORE=m CONFIG_MLX5_CORE_EN=y -CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_MICROCHIP is not set # CONFIG_NET_VENDOR_MICROSEMI is not set @@ -534,6 +534,7 @@ CONFIG_MLX5_ESWITCH=y # CONFIG_NET_VENDOR_SYNOPSYS is not set # CONFIG_NET_VENDOR_TEHUTI is not set # CONFIG_NET_VENDOR_TI is not set +# CONFIG_NET_VENDOR_VERTEXCOM is not set # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set @@ -583,6 +584,7 @@ CONFIG_VIRTIO_BALLOON=m CONFIG_VIRTIO_INPUT=y CONFIG_VHOST_NET=m CONFIG_VHOST_VSOCK=m +# CONFIG_SURFACE_PLATFORMS is not set CONFIG_S390_CCW_IOMMU=y CONFIG_S390_AP_IOMMU=y CONFIG_EXT4_FS=y @@ -744,9 +746,6 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=m CONFIG_CRYPTO_USER_API_RNG=m CONFIG_CRYPTO_USER_API_AEAD=m CONFIG_CRYPTO_STATS=y -CONFIG_CRYPTO_LIB_BLAKE2S=m -CONFIG_CRYPTO_LIB_CURVE25519=m -CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_ZCRYPT=m CONFIG_PKEY=m CONFIG_CRYPTO_PAES_S390=m @@ -763,6 +762,8 @@ CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRYPTO_DEV_VIRTIO=m CONFIG_CORDIC=m CONFIG_PRIME_NUMBERS=m +CONFIG_CRYPTO_LIB_CURVE25519=m +CONFIG_CRYPTO_LIB_CHACHA20POLY1305=m CONFIG_CRC4=m CONFIG_CRC7=m CONFIG_CRC8=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index eed3b9acfa71aaf59f8fdd2a2dd58517c0f2e144..c55c668dc3c788ba11c6e83803c62ee50a4c75f5 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -1,6 +1,7 @@ # CONFIG_SWAP is not set CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y +CONFIG_BPF_SYSCALL=y # CONFIG_CPU_ISOLATION is not set # CONFIG_UTS_NS is not set # CONFIG_TIME_NS is not set @@ -34,6 +35,7 @@ CONFIG_NET=y # CONFIG_PCPU_DEV_REFCNT is not set # CONFIG_ETHTOOL_NETLINK is not set CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_SAFE=y CONFIG_BLK_DEV_RAM=y # CONFIG_DCSSBLK is not set # CONFIG_DASD is not set @@ -58,6 +60,7 @@ CONFIG_ZFCP=y # CONFIG_HID is not set # CONFIG_VIRTIO_MENU is not set # CONFIG_VHOST_MENU is not set +# CONFIG_SURFACE_PLATFORMS is not set # CONFIG_IOMMU_SUPPORT is not set # CONFIG_DNOTIFY is not set # CONFIG_INOTIFY_USER is not set diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index 33f973ff974420e94fddcd221d66740a9ad7cec3..e8f15dbb89d02c2d21dbec26e19193a9de2a5d31 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -20,6 +20,7 @@ static char local_guest[] = " "; static char all_guests[] = "* "; +static char *all_groups = all_guests; static char *guest_query; struct diag2fc_data { @@ -62,10 +63,11 @@ static int diag2fc(int size, char* query, void *addr) memcpy(parm_list.userid, query, NAME_LEN); ASCEBC(parm_list.userid, NAME_LEN); - parm_list.addr = (unsigned long) addr ; + memcpy(parm_list.aci_grp, all_groups, NAME_LEN); + ASCEBC(parm_list.aci_grp, NAME_LEN); + parm_list.addr = (unsigned long)addr; parm_list.size = size; parm_list.fmt = 0x02; - memset(parm_list.aci_grp, 0x40, NAME_LEN); rc = -1; diag_stat_inc(DIAG_STAT_X2FC); diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h index 5a530c552c2383ed370291b7322a962f75035e37..1d40630128a5b75a71497f00f980518292449686 100644 --- a/arch/s390/include/asm/bitops.h +++ b/arch/s390/include/asm/bitops.h @@ -387,7 +387,6 @@ static inline int fls(unsigned int word) #endif /* CONFIG_HAVE_MARCH_Z9_109_FEATURES */ #include -#include #include #include #include diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 147cb3534ce48ff56d6636a91410e81b050e8914..d74e26b48604d720b5b776057320b3390b10ee3b 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -47,8 +47,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); int __put_user_bad(void) __attribute__((noreturn)); int __get_user_bad(void) __attribute__((noreturn)); -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - union oac { unsigned int val; struct { @@ -71,6 +69,8 @@ union oac { }; }; +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + #define __put_get_user_asm(to, from, size, oac_spec) \ ({ \ int __rc; \ diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index d52d85367bf73013230e576f17253f8c96dad6ec..b032e556eeb7124189d6de92c3cbfe14a87cc9ad 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -33,7 +33,7 @@ #define DEBUGP(fmt , ...) #endif -#define PLT_ENTRY_SIZE 20 +#define PLT_ENTRY_SIZE 22 void *module_alloc(unsigned long size) { @@ -341,27 +341,26 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { - unsigned int insn[5]; - unsigned int *ip = me->core_layout.base + - me->arch.plt_offset + - info->plt_offset; - - insn[0] = 0x0d10e310; /* basr 1,0 */ - insn[1] = 0x100a0004; /* lg 1,10(1) */ + unsigned char insn[PLT_ENTRY_SIZE]; + char *plt_base; + char *ip; + + plt_base = me->core_layout.base + me->arch.plt_offset; + ip = plt_base + info->plt_offset; + *(int *)insn = 0x0d10e310; /* basr 1,0 */ + *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { - unsigned int *ij; - ij = me->core_layout.base + - me->arch.plt_offset + - me->arch.plt_size - PLT_ENTRY_SIZE; - insn[2] = 0xa7f40000 + /* j __jump_r1 */ - (unsigned int)(u16) - (((unsigned long) ij - 8 - - (unsigned long) ip) / 2); + char *jump_r1; + + jump_r1 = plt_base + me->arch.plt_size - + PLT_ENTRY_SIZE; + /* brcl 0xf,__jump_r1 */ + *(short *)&insn[8] = 0xc0f4; + *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2; } else { - insn[2] = 0x07f10000; /* br %r1 */ + *(int *)&insn[8] = 0x07f10000; /* br %r1 */ } - insn[3] = (unsigned int) (val >> 32); - insn[4] = (unsigned int) val; + *(long *)&insn[14] = val; write(ip, insn, sizeof(insn)); info->plt_initialized = 1; diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 0c9e894913dccd614002bbc6e562683386220788..651a51914e3442ff1de32ac6216611830c57d2f0 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -264,7 +264,14 @@ static int notrace s390_validate_registers(union mci mci, int umode) /* Validate vector registers */ union ctlreg0 cr0; - if (!mci.vr) { + /* + * The vector validity must only be checked if not running a + * KVM guest. For KVM guests the machine check is forwarded by + * KVM and it is the responsibility of the guest to take + * appropriate actions. The host vector or FPU values have been + * saved by KVM and will be restored by KVM. + */ + if (!mci.vr && !test_cpu_flag(CIF_MCCK_GUEST)) { /* * Vector registers can't be restored. If the kernel * currently uses vector registers the system is @@ -307,11 +314,21 @@ static int notrace s390_validate_registers(union mci mci, int umode) if (cr2.gse) { if (!mci.gs) { /* - * Guarded storage register can't be restored and - * the current processes uses guarded storage. - * It has to be terminated. + * 2 cases: + * - machine check in kernel or userspace + * - machine check while running SIE (KVM guest) + * For kernel or userspace the userspace values of + * guarded storage control can not be recreated, the + * process must be terminated. + * For SIE the guest values of guarded storage can not + * be recreated. This is either due to a bug or due to + * GS being disabled in the guest. The guest will be + * notified by KVM code and the guests machine check + * handling must take care of this. The host values + * are saved by KVM and are not affected. */ - kill_task = 1; + if (!test_cpu_flag(CIF_MCCK_GUEST)) + kill_task = 1; } else { load_gs_cb((struct gs_cb *)mcesa->guarded_storage_save_area); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9c6d45d0d3451b20aeeb660b8bafec9a6244f92c..2296b1ff1e0232ca3cb54e7f095834ab2addd555 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1990,7 +1990,7 @@ static unsigned long kvm_s390_next_dirty_cmma(struct kvm_memslots *slots, ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, ofs); while (ofs >= ms->npages && (mnode = rb_next(mnode))) { ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]); - ofs = find_next_bit(kvm_second_dirty_bitmap(ms), ms->npages, 0); + ofs = find_first_bit(kvm_second_dirty_bitmap(ms), ms->npages); } return ms->base_gfn + ofs; } @@ -4667,6 +4667,8 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, return -EINVAL; if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) return -E2BIG; + if (!kvm_s390_pv_cpu_is_protected(vcpu)) + return -EINVAL; switch (mop->op) { case KVM_S390_MEMOP_SIDA_READ: diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 707cd4622c132d509e71e8d55a48cd2d314858fa..69feb8ed3312d406d0aaa72b6d08138c274a615b 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -17,4 +17,7 @@ KASAN_SANITIZE_uaccess.o := n obj-$(CONFIG_S390_UNWIND_SELFTEST) += test_unwind.o CFLAGS_test_unwind.o += -fno-optimize-sibling-calls +obj-$(CONFIG_S390_MODULES_SANITY_TEST) += test_modules.o +obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o + lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o diff --git a/arch/s390/lib/test_modules.c b/arch/s390/lib/test_modules.c new file mode 100644 index 0000000000000000000000000000000000000000..9894009fc1f25c78814f146ce1908a49dbfb4330 --- /dev/null +++ b/arch/s390/lib/test_modules.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include +#include + +#include "test_modules.h" + +/* + * Test that modules with many relocations are loaded properly. + */ +static void test_modules_many_vmlinux_relocs(struct kunit *test) +{ + int result = 0; + +#define CALL_RETURN(i) result += test_modules_return_ ## i() + REPEAT_10000(CALL_RETURN); + KUNIT_ASSERT_EQ(test, result, 49995000); +} + +static struct kunit_case modules_testcases[] = { + KUNIT_CASE(test_modules_many_vmlinux_relocs), + {} +}; + +static struct kunit_suite modules_test_suite = { + .name = "modules_test_s390", + .test_cases = modules_testcases, +}; + +kunit_test_suites(&modules_test_suite); + +MODULE_LICENSE("GPL"); diff --git a/arch/s390/lib/test_modules.h b/arch/s390/lib/test_modules.h new file mode 100644 index 0000000000000000000000000000000000000000..6371fcf1768456873fa0d8dbe7819faeef4ecf38 --- /dev/null +++ b/arch/s390/lib/test_modules.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef TEST_MODULES_H +#define TEST_MODULES_H + +#define __REPEAT_10000_3(f, x) \ + f(x ## 0); \ + f(x ## 1); \ + f(x ## 2); \ + f(x ## 3); \ + f(x ## 4); \ + f(x ## 5); \ + f(x ## 6); \ + f(x ## 7); \ + f(x ## 8); \ + f(x ## 9) +#define __REPEAT_10000_2(f, x) \ + __REPEAT_10000_3(f, x ## 0); \ + __REPEAT_10000_3(f, x ## 1); \ + __REPEAT_10000_3(f, x ## 2); \ + __REPEAT_10000_3(f, x ## 3); \ + __REPEAT_10000_3(f, x ## 4); \ + __REPEAT_10000_3(f, x ## 5); \ + __REPEAT_10000_3(f, x ## 6); \ + __REPEAT_10000_3(f, x ## 7); \ + __REPEAT_10000_3(f, x ## 8); \ + __REPEAT_10000_3(f, x ## 9) +#define __REPEAT_10000_1(f, x) \ + __REPEAT_10000_2(f, x ## 0); \ + __REPEAT_10000_2(f, x ## 1); \ + __REPEAT_10000_2(f, x ## 2); \ + __REPEAT_10000_2(f, x ## 3); \ + __REPEAT_10000_2(f, x ## 4); \ + __REPEAT_10000_2(f, x ## 5); \ + __REPEAT_10000_2(f, x ## 6); \ + __REPEAT_10000_2(f, x ## 7); \ + __REPEAT_10000_2(f, x ## 8); \ + __REPEAT_10000_2(f, x ## 9) +#define REPEAT_10000(f) \ + __REPEAT_10000_1(f, 0); \ + __REPEAT_10000_1(f, 1); \ + __REPEAT_10000_1(f, 2); \ + __REPEAT_10000_1(f, 3); \ + __REPEAT_10000_1(f, 4); \ + __REPEAT_10000_1(f, 5); \ + __REPEAT_10000_1(f, 6); \ + __REPEAT_10000_1(f, 7); \ + __REPEAT_10000_1(f, 8); \ + __REPEAT_10000_1(f, 9) + +#define DECLARE_RETURN(i) int test_modules_return_ ## i(void) +REPEAT_10000(DECLARE_RETURN); + +#endif diff --git a/arch/s390/lib/test_modules_helpers.c b/arch/s390/lib/test_modules_helpers.c new file mode 100644 index 0000000000000000000000000000000000000000..1670349a03ebabddc7fb3f57e7624f8972be4aed --- /dev/null +++ b/arch/s390/lib/test_modules_helpers.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include + +#include "test_modules.h" + +#define DEFINE_RETURN(i) \ + int test_modules_return_ ## i(void) \ + { \ + return 1 ## i - 10000; \ + } \ + EXPORT_SYMBOL_GPL(test_modules_return_ ## i) +REPEAT_10000(DEFINE_RETURN); diff --git a/arch/sh/include/asm/bitops.h b/arch/sh/include/asm/bitops.h index 3b6c7b5b7ec9f8020062eddbb0f6bd9126ae7e32..10ceb0d6b5a99730957281a74efe0400254712f3 100644 --- a/arch/sh/include/asm/bitops.h +++ b/arch/sh/include/asm/bitops.h @@ -68,6 +68,5 @@ static inline unsigned long __ffs(unsigned long word) #include #include -#include #endif /* __ASM_SH_BITOPS_H */ diff --git a/arch/sh/mm/alignment.c b/arch/sh/mm/alignment.c index fb517b82a87b1065cf38c06cb3c178ce86587b00..3a76a766f4232cbffd6071eaa0c4c9762c5c9c59 100644 --- a/arch/sh/mm/alignment.c +++ b/arch/sh/mm/alignment.c @@ -140,7 +140,7 @@ static int alignment_proc_open(struct inode *inode, struct file *file) static ssize_t alignment_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - int *data = PDE_DATA(file_inode(file)); + int *data = pde_data(file_inode(file)); char mode; if (count > 0) { @@ -161,7 +161,7 @@ static const struct proc_ops alignment_proc_ops = { }; /* - * This needs to be done after sysctl_init, otherwise sys/ will be + * This needs to be done after sysctl_init_bases(), otherwise sys/ will be * overwritten. Actually, this shouldn't be in sys/ at all since * it isn't a sysctl, and it doesn't contain sysctl information. * We now locate it in /proc/cpu/alignment instead. diff --git a/arch/sparc/include/asm/bitops_32.h b/arch/sparc/include/asm/bitops_32.h index 0ceff3b915a8946b0c91f0fe9ef9886cf1df6f1e..889afa9f990ff88930b94758cc336ca2ca79da35 100644 --- a/arch/sparc/include/asm/bitops_32.h +++ b/arch/sparc/include/asm/bitops_32.h @@ -100,7 +100,6 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) #include #include #include -#include #include #include diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index ca7ea5913494f9b66991c1659fcba70f04c525be..005a8ae858f16cc9d1c8c84eace88dcd4fd0d16b 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -52,8 +52,6 @@ unsigned int __arch_hweight8(unsigned int w); #include #endif /* __KERNEL__ */ -#include - #ifdef __KERNEL__ #include diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6fddb63271d9e32d4c457f6c7ed2b6589bbc6ac2..9f5bd41bf660c7b024e6cf0838e6b7a3cf31b54e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -137,7 +137,6 @@ config X86 select GENERIC_CPU_VULNERABILITIES select GENERIC_EARLY_IOREMAP select GENERIC_ENTRY - select GENERIC_FIND_FIRST_BIT select GENERIC_IOMAP select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP select GENERIC_IRQ_MATRIX_ALLOCATOR if X86_LOCAL_APIC @@ -187,6 +186,7 @@ config X86 select HAVE_CONTEXT_TRACKING_OFFSTACK if HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT select HAVE_OBJTOOL_MCOUNT if STACK_VALIDATION + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c index f9e2fecdb761b8c75b1ea2d16eefdf15fa12edcf..59ae28abe35cc196ecf9508e9f314e117931e49c 100644 --- a/arch/x86/crypto/blake2s-shash.c +++ b/arch/x86/crypto/blake2s-shash.c @@ -18,12 +18,12 @@ static int crypto_blake2s_update_x86(struct shash_desc *desc, const u8 *in, unsigned int inlen) { - return crypto_blake2s_update(desc, in, inlen, blake2s_compress); + return crypto_blake2s_update(desc, in, inlen, false); } static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) { - return crypto_blake2s_final(desc, out, blake2s_compress); + return crypto_blake2s_final(desc, out, false); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fd9f908debe5f4f89b21c5c4735b909643c3e047..a3c7ca876aebd7243877125d31491083df09b426 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4703,6 +4703,19 @@ static __initconst const struct x86_pmu intel_pmu = { .lbr_read = intel_pmu_lbr_read_64, .lbr_save = intel_pmu_lbr_save, .lbr_restore = intel_pmu_lbr_restore, + + /* + * SMM has access to all 4 rings and while traditionally SMM code only + * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM. + * + * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction + * between SMM or not, this results in what should be pure userspace + * counters including SMM data. + * + * This is a clear privilege issue, therefore globally disable + * counting SMM by default. + */ + .attr_freeze_on_smi = 1, }; static __init void intel_clovertown_quirk(void) @@ -6236,6 +6249,19 @@ __init int intel_pmu_init(void) pmu->num_counters = x86_pmu.num_counters; pmu->num_counters_fixed = x86_pmu.num_counters_fixed; } + + /* + * Quirk: For some Alder Lake machine, when all E-cores are disabled in + * a BIOS, the leaf 0xA will enumerate all counters of P-cores. However, + * the X86_FEATURE_HYBRID_CPU is still set. The above codes will + * mistakenly add extra counters for P-cores. Correct the number of + * counters here. + */ + if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) { + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + } + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); pmu->unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, @@ -6340,6 +6366,8 @@ __init int intel_pmu_init(void) } if (x86_pmu.lbr_nr) { + intel_pmu_lbr_init(); + pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr); /* only support branch_stack snapshot for perfmon >= v2 */ diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 8043213b75a52f7082551da414cc25b765c2f3eb..669c2be14784fc18ffcebff9ead8cae9692a8b5c 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -8,14 +8,6 @@ #include "../perf_event.h" -static const enum { - LBR_EIP_FLAGS = 1, - LBR_TSX = 2, -} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = { - [LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS, - [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX, -}; - /* * Intel LBR_SELECT bits * Intel Vol3a, April 2011, Section 16.7 Table 16-10 @@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void) for (i = 0; i < x86_pmu.lbr_nr; i++) { wrmsrl(x86_pmu.lbr_from + i, 0); wrmsrl(x86_pmu.lbr_to + i, 0); - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) + if (x86_pmu.lbr_has_info) wrmsrl(x86_pmu.lbr_info + i, 0); } } @@ -305,11 +297,10 @@ enum { */ static inline bool lbr_from_signext_quirk_needed(void) { - int lbr_format = x86_pmu.intel_cap.lbr_format; bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) || boot_cpu_has(X86_FEATURE_RTM); - return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX); + return !tsx_support && x86_pmu.lbr_has_tsx; } static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key); @@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info) void intel_pmu_lbr_restore(void *ctx) { - bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx = ctx; - int i; - unsigned lbr_idx, mask; + bool need_info = x86_pmu.lbr_has_info; u64 tos = task_ctx->tos; + unsigned lbr_idx, mask; + int i; mask = x86_pmu.lbr_nr - 1; for (i = 0; i < task_ctx->valid_lbrs; i++) { @@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx) lbr_idx = (tos - i) & mask; wrlbr_from(lbr_idx, 0); wrlbr_to(lbr_idx, 0); - if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO) + if (need_info) wrlbr_info(lbr_idx, 0); } @@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx) void intel_pmu_lbr_save(void *ctx) { - bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); struct x86_perf_task_context *task_ctx = ctx; + bool need_info = x86_pmu.lbr_has_info; unsigned lbr_idx, mask; u64 tos; int i; @@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { bool need_info = false, call_stack = false; unsigned long mask = x86_pmu.lbr_nr - 1; - int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); int i; int out = 0; @@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) for (i = 0; i < num; i++) { unsigned long lbr_idx = (tos - i) & mask; u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; - int skip = 0; u16 cycles = 0; - int lbr_flags = lbr_desc[lbr_format]; from = rdlbr_from(lbr_idx, NULL); to = rdlbr_to(lbr_idx, NULL); @@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) if (call_stack && !from) break; - if (lbr_format == LBR_FORMAT_INFO && need_info) { - u64 info; - - info = rdlbr_info(lbr_idx, NULL); - mis = !!(info & LBR_INFO_MISPRED); - pred = !mis; - in_tx = !!(info & LBR_INFO_IN_TX); - abort = !!(info & LBR_INFO_ABORT); - cycles = (info & LBR_INFO_CYCLES); - } - - if (lbr_format == LBR_FORMAT_TIME) { - mis = !!(from & LBR_FROM_FLAG_MISPRED); - pred = !mis; - skip = 1; - cycles = ((to >> 48) & LBR_INFO_CYCLES); - - to = (u64)((((s64)to) << 16) >> 16); - } - - if (lbr_flags & LBR_EIP_FLAGS) { - mis = !!(from & LBR_FROM_FLAG_MISPRED); - pred = !mis; - skip = 1; - } - if (lbr_flags & LBR_TSX) { - in_tx = !!(from & LBR_FROM_FLAG_IN_TX); - abort = !!(from & LBR_FROM_FLAG_ABORT); - skip = 3; + if (x86_pmu.lbr_has_info) { + if (need_info) { + u64 info; + + info = rdlbr_info(lbr_idx, NULL); + mis = !!(info & LBR_INFO_MISPRED); + pred = !mis; + cycles = (info & LBR_INFO_CYCLES); + if (x86_pmu.lbr_has_tsx) { + in_tx = !!(info & LBR_INFO_IN_TX); + abort = !!(info & LBR_INFO_ABORT); + } + } + } else { + int skip = 0; + + if (x86_pmu.lbr_from_flags) { + mis = !!(from & LBR_FROM_FLAG_MISPRED); + pred = !mis; + skip = 1; + } + if (x86_pmu.lbr_has_tsx) { + in_tx = !!(from & LBR_FROM_FLAG_IN_TX); + abort = !!(from & LBR_FROM_FLAG_ABORT); + skip = 3; + } + from = (u64)((((s64)from) << skip) >> skip); + + if (x86_pmu.lbr_to_cycles) { + cycles = ((to >> 48) & LBR_INFO_CYCLES); + to = (u64)((((s64)to) << 16) >> 16); + } } - from = (u64)((((s64)from) << skip) >> skip); /* * Some CPUs report duplicated abort records, @@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) cpuc->lbr_stack.hw_idx = tos; } +static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred); +static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles); +static DEFINE_STATIC_KEY_FALSE(x86_lbr_type); + static __always_inline int get_lbr_br_type(u64 info) { - if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type) - return 0; + int type = 0; - return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; + if (static_branch_likely(&x86_lbr_type)) + type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET; + + return type; } static __always_inline bool get_lbr_mispred(u64 info) { - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred) - return 0; + bool mispred = 0; - return !!(info & LBR_INFO_MISPRED); -} + if (static_branch_likely(&x86_lbr_mispred)) + mispred = !!(info & LBR_INFO_MISPRED); -static __always_inline bool get_lbr_predicted(u64 info) -{ - if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred) - return 0; - - return !(info & LBR_INFO_MISPRED); + return mispred; } static __always_inline u16 get_lbr_cycles(u64 info) { + u16 cycles = info & LBR_INFO_CYCLES; + if (static_cpu_has(X86_FEATURE_ARCH_LBR) && - !(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID)) - return 0; + (!static_branch_likely(&x86_lbr_cycles) || + !(info & LBR_INFO_CYC_CNT_VALID))) + cycles = 0; - return info & LBR_INFO_CYCLES; + return cycles; } static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, @@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc, e->from = from; e->to = to; e->mispred = get_lbr_mispred(info); - e->predicted = get_lbr_predicted(info); + e->predicted = !e->mispred; e->in_tx = !!(info & LBR_INFO_IN_TX); e->abort = !!(info & LBR_INFO_ABORT); e->cycles = get_lbr_cycles(info); @@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) && (br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) && - (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)) + x86_pmu.lbr_has_info) reg->config |= LBR_NO_INFO; return 0; @@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void) x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS; } +void intel_pmu_lbr_init(void) +{ + switch (x86_pmu.intel_cap.lbr_format) { + case LBR_FORMAT_EIP_FLAGS2: + x86_pmu.lbr_has_tsx = 1; + fallthrough; + case LBR_FORMAT_EIP_FLAGS: + x86_pmu.lbr_from_flags = 1; + break; + + case LBR_FORMAT_INFO: + x86_pmu.lbr_has_tsx = 1; + fallthrough; + case LBR_FORMAT_INFO2: + x86_pmu.lbr_has_info = 1; + break; + + case LBR_FORMAT_TIME: + x86_pmu.lbr_from_flags = 1; + x86_pmu.lbr_to_cycles = 1; + break; + } + + if (x86_pmu.lbr_has_info) { + /* + * Only used in combination with baseline pebs. + */ + static_branch_enable(&x86_lbr_mispred); + static_branch_enable(&x86_lbr_cycles); + } +} + /* * LBR state size is variable based on the max number of registers. * This calculates the expected state size, which should match @@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void) * Check the LBR state with the corresponding software structure. * Disable LBR XSAVES support if the size doesn't match. */ + if (xfeature_size(XFEATURE_LBR) == 0) + return false; + if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) return false; @@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void) x86_pmu.lbr_br_type = ecx.split.lbr_br_type; x86_pmu.lbr_nr = lbr_nr; + if (x86_pmu.lbr_mispred) + static_branch_enable(&x86_lbr_mispred); + if (x86_pmu.lbr_timed_lbr) + static_branch_enable(&x86_lbr_cycles); + if (x86_pmu.lbr_br_type) + static_branch_enable(&x86_lbr_type); arch_lbr_xsave = is_arch_lbr_xsave_available(); if (arch_lbr_xsave) { diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 7f406c14715fde33c4e813c1605b16eed3c79990..2d33bba9a144048f298053714ab59bfc074e6ae8 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -897,8 +897,9 @@ static void pt_handle_status(struct pt *pt) * means we are already losing data; need to let the decoder * know. */ - if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || - buf->output_off == pt_buffer_region_size(buf)) { + if (!buf->single && + (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || + buf->output_off == pt_buffer_region_size(buf))) { perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_TRUNCATED); advance++; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index f1ba6ab2e97ef53c54bab9e3824a6b6a925edcd9..e497da9bf42707b654dc00d94ad5c998927918b2 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = { static const struct intel_uncore_init_fun adl_uncore_init __initconst = { .cpu_init = adl_uncore_cpu_init, - .mmio_init = tgl_uncore_mmio_init, + .mmio_init = adl_uncore_mmio_init, }; static const struct intel_uncore_init_fun icx_uncore_init __initconst = { diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index b9687980aab6d3d897a3423db71f30c6596167f9..2adeaf4de4df6f5bdf5fd843d233a9db72f89154 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void); void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); void icl_uncore_cpu_init(void); -void adl_uncore_cpu_init(void); void tgl_uncore_cpu_init(void); +void adl_uncore_cpu_init(void); void tgl_uncore_mmio_init(void); void tgl_l_uncore_mmio_init(void); +void adl_uncore_mmio_init(void); int snb_pci2phy_map_init(int devid); /* uncore_snbep.c */ diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 3049c646fa209f4c962c5126f2253a8ebe74c092..6ddadb482f68d6cb3f56103fb045afbf5ff0d487 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -494,8 +494,8 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box) writel(0, box->io_addr); } -static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, - struct perf_event *event) +void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index 6d735611c281c1d4a3eccbb8aa652791247f6941..cfaf558bdb6b3b6e3e54a73c9df86c0ded96aac0 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -139,6 +139,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box); void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box); void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box, struct perf_event *event); +void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box, + struct perf_event *event); void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box); void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box); diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 0f63706cdadfca52abdaf5e6e28fdd32bf361073..f698a55bde8189884793e40481f060e132eef42b 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Nehalem/SandBridge/Haswell/Broadwell/Skylake uncore support */ #include "uncore.h" +#include "uncore_discovery.h" /* Uncore IMC PCI IDs */ #define PCI_DEVICE_ID_INTEL_SNB_IMC 0x0100 @@ -64,6 +65,20 @@ #define PCI_DEVICE_ID_INTEL_RKL_2_IMC 0x4c53 #define PCI_DEVICE_ID_INTEL_ADL_1_IMC 0x4660 #define PCI_DEVICE_ID_INTEL_ADL_2_IMC 0x4641 +#define PCI_DEVICE_ID_INTEL_ADL_3_IMC 0x4601 +#define PCI_DEVICE_ID_INTEL_ADL_4_IMC 0x4602 +#define PCI_DEVICE_ID_INTEL_ADL_5_IMC 0x4609 +#define PCI_DEVICE_ID_INTEL_ADL_6_IMC 0x460a +#define PCI_DEVICE_ID_INTEL_ADL_7_IMC 0x4621 +#define PCI_DEVICE_ID_INTEL_ADL_8_IMC 0x4623 +#define PCI_DEVICE_ID_INTEL_ADL_9_IMC 0x4629 +#define PCI_DEVICE_ID_INTEL_ADL_10_IMC 0x4637 +#define PCI_DEVICE_ID_INTEL_ADL_11_IMC 0x463b +#define PCI_DEVICE_ID_INTEL_ADL_12_IMC 0x4648 +#define PCI_DEVICE_ID_INTEL_ADL_13_IMC 0x4649 +#define PCI_DEVICE_ID_INTEL_ADL_14_IMC 0x4650 +#define PCI_DEVICE_ID_INTEL_ADL_15_IMC 0x4668 +#define PCI_DEVICE_ID_INTEL_ADL_16_IMC 0x4670 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -155,6 +170,7 @@ DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15"); +DEFINE_UNCORE_FORMAT_ATTR(chmask, chmask, "config:8-11"); DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18"); DEFINE_UNCORE_FORMAT_ATTR(inv, inv, "config:23"); DEFINE_UNCORE_FORMAT_ATTR(cmask5, cmask, "config:24-28"); @@ -1334,6 +1350,62 @@ static const struct pci_device_id tgl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_2_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_3_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_4_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_5_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_6_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_7_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_8_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_9_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_10_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_11_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_12_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_13_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_14_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_15_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ADL_16_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ } }; @@ -1390,7 +1462,8 @@ static struct pci_dev *tgl_uncore_get_mc_dev(void) #define TGL_UNCORE_MMIO_IMC_MEM_OFFSET 0x10000 #define TGL_UNCORE_PCI_IMC_MAP_SIZE 0xe000 -static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +static void __uncore_imc_init_box(struct intel_uncore_box *box, + unsigned int base_offset) { struct pci_dev *pdev = tgl_uncore_get_mc_dev(); struct intel_uncore_pmu *pmu = box->pmu; @@ -1417,11 +1490,17 @@ static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) addr |= ((resource_size_t)mch_bar << 32); #endif + addr += base_offset; box->io_addr = ioremap(addr, type->mmio_map_size); if (!box->io_addr) pr_warn("perf uncore: Failed to ioremap for %s.\n", type->name); } +static void tgl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, 0); +} + static struct intel_uncore_ops tgl_uncore_imc_freerunning_ops = { .init_box = tgl_uncore_imc_freerunning_init_box, .exit_box = uncore_mmio_exit_box, @@ -1469,3 +1548,136 @@ void tgl_uncore_mmio_init(void) } /* end of Tiger Lake MMIO uncore support */ + +/* Alder Lake MMIO uncore support */ +#define ADL_UNCORE_IMC_BASE 0xd900 +#define ADL_UNCORE_IMC_MAP_SIZE 0x200 +#define ADL_UNCORE_IMC_CTR 0xe8 +#define ADL_UNCORE_IMC_CTRL 0xd0 +#define ADL_UNCORE_IMC_GLOBAL_CTL 0xc0 +#define ADL_UNCORE_IMC_BOX_CTL 0xc4 +#define ADL_UNCORE_IMC_FREERUNNING_BASE 0xd800 +#define ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE 0x100 + +#define ADL_UNCORE_IMC_CTL_FRZ (1 << 0) +#define ADL_UNCORE_IMC_CTL_RST_CTRL (1 << 1) +#define ADL_UNCORE_IMC_CTL_RST_CTRS (1 << 2) +#define ADL_UNCORE_IMC_CTL_INT (ADL_UNCORE_IMC_CTL_RST_CTRL | \ + ADL_UNCORE_IMC_CTL_RST_CTRS) + +static void adl_uncore_imc_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, ADL_UNCORE_IMC_BASE); + + /* The global control in MC1 can control both MCs. */ + if (box->io_addr && (box->pmu->pmu_idx == 1)) + writel(ADL_UNCORE_IMC_CTL_INT, box->io_addr + ADL_UNCORE_IMC_GLOBAL_CTL); +} + +static void adl_uncore_mmio_disable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(ADL_UNCORE_IMC_CTL_FRZ, box->io_addr + uncore_mmio_box_ctl(box)); +} + +static void adl_uncore_mmio_enable_box(struct intel_uncore_box *box) +{ + if (!box->io_addr) + return; + + writel(0, box->io_addr + uncore_mmio_box_ctl(box)); +} + +static struct intel_uncore_ops adl_uncore_mmio_ops = { + .init_box = adl_uncore_imc_init_box, + .exit_box = uncore_mmio_exit_box, + .disable_box = adl_uncore_mmio_disable_box, + .enable_box = adl_uncore_mmio_enable_box, + .disable_event = intel_generic_uncore_mmio_disable_event, + .enable_event = intel_generic_uncore_mmio_enable_event, + .read_counter = uncore_mmio_read_counter, +}; + +#define ADL_UNC_CTL_CHMASK_MASK 0x00000f00 +#define ADL_UNC_IMC_EVENT_MASK (SNB_UNC_CTL_EV_SEL_MASK | \ + ADL_UNC_CTL_CHMASK_MASK | \ + SNB_UNC_CTL_EDGE_DET) + +static struct attribute *adl_uncore_imc_formats_attr[] = { + &format_attr_event.attr, + &format_attr_chmask.attr, + &format_attr_edge.attr, + NULL, +}; + +static const struct attribute_group adl_uncore_imc_format_group = { + .name = "format", + .attrs = adl_uncore_imc_formats_attr, +}; + +static struct intel_uncore_type adl_uncore_imc = { + .name = "imc", + .num_counters = 5, + .num_boxes = 2, + .perf_ctr_bits = 64, + .perf_ctr = ADL_UNCORE_IMC_CTR, + .event_ctl = ADL_UNCORE_IMC_CTRL, + .event_mask = ADL_UNC_IMC_EVENT_MASK, + .box_ctl = ADL_UNCORE_IMC_BOX_CTL, + .mmio_offset = 0, + .mmio_map_size = ADL_UNCORE_IMC_MAP_SIZE, + .ops = &adl_uncore_mmio_ops, + .format_group = &adl_uncore_imc_format_group, +}; + +enum perf_adl_uncore_imc_freerunning_types { + ADL_MMIO_UNCORE_IMC_DATA_TOTAL, + ADL_MMIO_UNCORE_IMC_DATA_READ, + ADL_MMIO_UNCORE_IMC_DATA_WRITE, + ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX +}; + +static struct freerunning_counters adl_uncore_imc_freerunning[] = { + [ADL_MMIO_UNCORE_IMC_DATA_TOTAL] = { 0x40, 0x0, 0x0, 1, 64 }, + [ADL_MMIO_UNCORE_IMC_DATA_READ] = { 0x58, 0x0, 0x0, 1, 64 }, + [ADL_MMIO_UNCORE_IMC_DATA_WRITE] = { 0xA0, 0x0, 0x0, 1, 64 }, +}; + +static void adl_uncore_imc_freerunning_init_box(struct intel_uncore_box *box) +{ + __uncore_imc_init_box(box, ADL_UNCORE_IMC_FREERUNNING_BASE); +} + +static struct intel_uncore_ops adl_uncore_imc_freerunning_ops = { + .init_box = adl_uncore_imc_freerunning_init_box, + .exit_box = uncore_mmio_exit_box, + .read_counter = uncore_mmio_read_counter, + .hw_config = uncore_freerunning_hw_config, +}; + +static struct intel_uncore_type adl_uncore_imc_free_running = { + .name = "imc_free_running", + .num_counters = 3, + .num_boxes = 2, + .num_freerunning_types = ADL_MMIO_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .mmio_map_size = ADL_UNCORE_IMC_FREERUNNING_MAP_SIZE, + .freerunning = adl_uncore_imc_freerunning, + .ops = &adl_uncore_imc_freerunning_ops, + .event_descs = tgl_uncore_imc_events, + .format_group = &tgl_uncore_imc_format_group, +}; + +static struct intel_uncore_type *adl_mmio_uncores[] = { + &adl_uncore_imc, + &adl_uncore_imc_free_running, + NULL +}; + +void adl_uncore_mmio_init(void) +{ + uncore_mmio_uncores = adl_mmio_uncores; +} + +/* end of Alder Lake MMIO uncore support */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 3660f698fb2aa1dc563565c244c2082e2b32c8a1..ed869443efb21dc91ce5a0e8e30e4c0db2487069 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -5482,7 +5482,7 @@ static struct intel_uncore_type icx_uncore_imc = { .fixed_ctr_bits = 48, .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, - .event_descs = hswep_uncore_imc_events, + .event_descs = snr_uncore_imc_events, .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, .event_ctl = SNR_IMC_MMIO_PMON_CTL0, .event_mask = SNBEP_PMON_RAW_EVENT_MASK, diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 9d376e528dfcda3845f15060d1d2a2ab1c75f1a3..150261d929b9e945bb4aa7b52f8b664dcbe20daa 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -215,7 +215,8 @@ enum { LBR_FORMAT_EIP_FLAGS2 = 0x04, LBR_FORMAT_INFO = 0x05, LBR_FORMAT_TIME = 0x06, - LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_TIME, + LBR_FORMAT_INFO2 = 0x07, + LBR_FORMAT_MAX_KNOWN = LBR_FORMAT_INFO2, }; enum { @@ -840,6 +841,11 @@ struct x86_pmu { bool lbr_double_abort; /* duplicated lbr aborts */ bool lbr_pt_coexist; /* (LBR|BTS) may coexist with PT */ + unsigned int lbr_has_info:1; + unsigned int lbr_has_tsx:1; + unsigned int lbr_from_flags:1; + unsigned int lbr_to_cycles:1; + /* * Intel Architectural LBR CPUID Enumeration */ @@ -1392,6 +1398,8 @@ void intel_pmu_lbr_init_skl(void); void intel_pmu_lbr_init_knl(void); +void intel_pmu_lbr_init(void); + void intel_pmu_arch_lbr_init(void); void intel_pmu_pebs_data_source_nhm(void); diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 85feafacc445dc169e555d4e5f78f78ed3a45813..77e3a47af5ad5ed06ae70308683df27a20beccbb 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -536,11 +536,14 @@ static struct perf_msr intel_rapl_spr_msrs[] = { * - perf_msr_probe(PERF_RAPL_MAX) * - want to use same event codes across both architectures */ -static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = { - [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, +static struct perf_msr amd_rapl_msrs[] = { + [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 }, + [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, + [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 }, + [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 }, + [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 }, }; - static int rapl_cpu_offline(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 0367efdc5b7a8eaf724949c840c6ec3f8ceaf643..a288ecd230ab06ab495f4a393001ebf3167ba077 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -380,8 +380,6 @@ static __always_inline int fls64(__u64 x) #include #endif -#include - #include #include diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 84b87538a15de6e863ed7f9a8e17caf5d5233b05..bab883c0b6fee0350281b7b62d3b52ba75056108 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h @@ -22,7 +22,7 @@ #ifdef CONFIG_DEBUG_BUGVERBOSE -#define _BUG_FLAGS(ins, flags) \ +#define _BUG_FLAGS(ins, flags, extra) \ do { \ asm_inline volatile("1:\t" ins "\n" \ ".pushsection __bug_table,\"aw\"\n" \ @@ -31,7 +31,8 @@ do { \ "\t.word %c1" "\t# bug_entry::line\n" \ "\t.word %c2" "\t# bug_entry::flags\n" \ "\t.org 2b+%c3\n" \ - ".popsection" \ + ".popsection\n" \ + extra \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (flags), \ "i" (sizeof(struct bug_entry))); \ @@ -39,14 +40,15 @@ do { \ #else /* !CONFIG_DEBUG_BUGVERBOSE */ -#define _BUG_FLAGS(ins, flags) \ +#define _BUG_FLAGS(ins, flags, extra) \ do { \ asm_inline volatile("1:\t" ins "\n" \ ".pushsection __bug_table,\"aw\"\n" \ "2:\t" __BUG_REL(1b) "\t# bug_entry::bug_addr\n" \ "\t.word %c0" "\t# bug_entry::flags\n" \ "\t.org 2b+%c1\n" \ - ".popsection" \ + ".popsection\n" \ + extra \ : : "i" (flags), \ "i" (sizeof(struct bug_entry))); \ } while (0) @@ -55,7 +57,7 @@ do { \ #else -#define _BUG_FLAGS(ins, flags) asm volatile(ins) +#define _BUG_FLAGS(ins, flags, extra) asm volatile(ins) #endif /* CONFIG_GENERIC_BUG */ @@ -63,8 +65,8 @@ do { \ #define BUG() \ do { \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, 0); \ - unreachable(); \ + _BUG_FLAGS(ASM_UD2, 0, ""); \ + __builtin_unreachable(); \ } while (0) /* @@ -75,9 +77,9 @@ do { \ */ #define __WARN_FLAGS(flags) \ do { \ + __auto_type f = BUGFLAG_WARNING|(flags); \ instrumentation_begin(); \ - _BUG_FLAGS(ASM_UD2, BUGFLAG_WARNING|(flags)); \ - annotate_reachable(); \ + _BUG_FLAGS(ASM_UD2, f, ASM_REACHABLE); \ instrumentation_end(); \ } while (0) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index f658bb4dbb74dfe1a0233b533c2ea9086b9f43a3..d39e0de06be2304e35bdfdd39243d64dc24076d6 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -55,6 +55,7 @@ KVM_X86_OP_NULL(tlb_remote_flush) KVM_X86_OP_NULL(tlb_remote_flush_with_range) KVM_X86_OP(tlb_flush_gva) KVM_X86_OP(tlb_flush_guest) +KVM_X86_OP(vcpu_pre_run) KVM_X86_OP(run) KVM_X86_OP_NULL(handle_exit) KVM_X86_OP_NULL(skip_emulated_instruction) @@ -81,7 +82,7 @@ KVM_X86_OP_NULL(guest_apic_has_interrupt) KVM_X86_OP(load_eoi_exitmap) KVM_X86_OP(set_virtual_apic_mode) KVM_X86_OP_NULL(set_apic_access_page_addr) -KVM_X86_OP(deliver_posted_interrupt) +KVM_X86_OP(deliver_interrupt) KVM_X86_OP_NULL(sync_pir_to_irr) KVM_X86_OP(set_tss_addr) KVM_X86_OP(set_identity_map_addr) @@ -98,8 +99,6 @@ KVM_X86_OP(handle_exit_irqoff) KVM_X86_OP_NULL(request_immediate_exit) KVM_X86_OP(sched_in) KVM_X86_OP_NULL(update_cpu_dirty_logging) -KVM_X86_OP_NULL(pre_block) -KVM_X86_OP_NULL(post_block) KVM_X86_OP_NULL(vcpu_blocking) KVM_X86_OP_NULL(vcpu_unblocking) KVM_X86_OP_NULL(update_pi_irte) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0677b9ea01c901c68ea763e7330d904701fa8617..6dcccb304775411a8f38738526c79a5aa670c820 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1381,6 +1381,7 @@ struct kvm_x86_ops { */ void (*tlb_flush_guest)(struct kvm_vcpu *vcpu); + int (*vcpu_pre_run)(struct kvm_vcpu *vcpu); enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion exit_fastpath); @@ -1409,7 +1410,8 @@ struct kvm_x86_ops { void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); - int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); + void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); @@ -1454,18 +1456,6 @@ struct kvm_x86_ops { const struct kvm_pmu_ops *pmu_ops; const struct kvm_x86_nested_ops *nested_ops; - /* - * Architecture specific hooks for vCPU blocking due to - * HLT instruction. - * Returns for .pre_block(): - * - 0 means continue to block the vCPU. - * - 1 means we cannot block the vCPU since some event - * happens during this period, such as, 'ON' bit in - * posted-interrupts descriptor is set. - */ - int (*pre_block)(struct kvm_vcpu *vcpu); - void (*post_block)(struct kvm_vcpu *vcpu); - void (*vcpu_blocking)(struct kvm_vcpu *vcpu); void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); @@ -1494,7 +1484,8 @@ struct kvm_x86_ops { int (*get_msr_feature)(struct kvm_msr_entry *entry); - bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, void *insn, int insn_len); + bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); @@ -1507,6 +1498,7 @@ struct kvm_x86_ops { }; struct kvm_x86_nested_ops { + void (*leave_nested)(struct kvm_vcpu *vcpu); int (*check_events)(struct kvm_vcpu *vcpu); bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); void (*triple_fault)(struct kvm_vcpu *vcpu); @@ -1872,7 +1864,6 @@ int kvm_cpu_has_extint(struct kvm_vcpu *v); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); int kvm_cpu_get_interrupt(struct kvm_vcpu *v); void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); -void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu); int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, unsigned long ipi_bitmap_high, u32 min, diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h index a9630104f1c4fb7eb50ef118356ca350288d7365..78e667a31d6c463bf910d9a19e0b139935bafb45 100644 --- a/arch/x86/include/asm/xen/cpuid.h +++ b/arch/x86/include/asm/xen/cpuid.h @@ -100,6 +100,13 @@ /* Memory mapped from other domains has valid IOMMU entries */ #define XEN_HVM_CPUID_IOMMU_MAPPINGS (1u << 2) #define XEN_HVM_CPUID_VCPU_ID_PRESENT (1u << 3) /* vcpu id is present in EBX */ +#define XEN_HVM_CPUID_DOMID_PRESENT (1u << 4) /* domid is present in ECX */ +/* + * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be + * used to store high bits for the Destination ID. This expands the Destination + * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768. + */ +#define XEN_HVM_CPUID_EXT_DEST_ID (1u << 5) /* * Leaf 6 (0x40000x05) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 1bf2ad34188ada698c150304111ba7fce59b5416..16f548a661cf6d68b3516bf8226114d9ab4f6d8d 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -43,20 +43,6 @@ static inline uint32_t xen_cpuid_base(void) return hypervisor_cpuid_base("XenVMMXenVMM", 2); } -#ifdef CONFIG_XEN -extern bool __init xen_hvm_need_lapic(void); - -static inline bool __init xen_x2apic_para_available(void) -{ - return xen_hvm_need_lapic(); -} -#else -static inline bool __init xen_x2apic_para_available(void) -{ - return (xen_cpuid_base() != 0); -} -#endif - struct pci_dev; #ifdef CONFIG_XEN_PV_DOM0 diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 2da3316bb55959dad6b400e70b06628c4de0201b..bf6e96011dfed4e2e8582e511c08b5b6cbd723b6 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -452,6 +452,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index c132daabe6152282b042ece88fd6a586c1f45085..3e6f6b448f6aaa4eb19b1976162f05cd30b094cf 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -760,9 +760,9 @@ void __init lapic_update_legacy_vectors(void) void __init lapic_assign_system_vectors(void) { - unsigned int i, vector = 0; + unsigned int i, vector; - for_each_set_bit_from(vector, system_vectors, NR_VECTORS) + for_each_set_bit(vector, system_vectors, NR_VECTORS) irq_matrix_assign_system(vector_matrix, vector, false); if (nr_legacy_irqs() > 1) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index a1e2f41796dc15a6fb1d30b00cc7c90ac54d4981..9f4b508886dde47d90e54adcbc7f7ec5556540a8 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -423,7 +423,7 @@ static void threshold_restart_bank(void *_tr) u32 hi, lo; /* sysfs write might race against an offline operation */ - if (this_cpu_read(threshold_banks)) + if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off) return; rdmsr(tr->b->address, lo, hi); diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index bb9a46a804bf214afc571e4bcd426035ffcdd546..baafbb37be678dfdf1fe148141c65b93611fd137 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -486,6 +486,7 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_D: case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_XEON_PHI_KNL: case INTEL_FAM6_XEON_PHI_KNM: diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c index 001808e3901ccac3b3fb0a142d14615299be42cd..48afe96ae0f0f5e6a1ed1fc2850861dc10c3ea8f 100644 --- a/arch/x86/kernel/cpu/sgx/encl.c +++ b/arch/x86/kernel/cpu/sgx/encl.c @@ -410,6 +410,8 @@ void sgx_encl_release(struct kref *ref) } kfree(entry); + /* Invoke scheduler to prevent soft lockups. */ + cond_resched(); } xa_destroy(&encl->page_array); diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index 9ae64f9af956896cffc749d57009a1ba8e9bd46d..9b9fb7882c206c46e164d5d772cebda4b16dc540 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -1,5 +1,4 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include #include @@ -24,31 +23,11 @@ static void resource_clip(struct resource *res, resource_size_t start, res->start = end + 1; } -/* - * Some BIOS-es contain a bug where they add addresses which map to - * system RAM in the PCI host bridge window returned by the ACPI _CRS - * method, see commit 4dc2287c1805 ("x86: avoid E820 regions when - * allocating address space"). To avoid this Linux by default excludes - * E820 reservations when allocating addresses since 2010. - * In 2019 some systems have shown-up with E820 reservations which cover - * the entire _CRS returned PCI host bridge window, causing all attempts - * to assign memory to PCI BARs to fail if Linux uses E820 reservations. - * - * Ideally Linux would fully stop using E820 reservations, but then - * the old systems this was added for will regress. - * Instead keep the old behavior for old systems, while ignoring the - * E820 reservations for any systems from now on. - */ static void remove_e820_regions(struct resource *avail) { - int i, year = dmi_get_bios_year(); + int i; struct e820_entry *entry; - if (year >= 2018) - return; - - pr_info_once("PCI: Removing E820 reservations from host bridge windows\n"); - for (i = 0; i < e820_table->nr_entries; i++) { entry = &e820_table->entries[i]; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c55e57b30e81585fd78d4180f51bdb3f2ce0b67c..494d4d3518597f28676ad2cf36f2737234e01572 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -119,6 +119,29 @@ static int kvm_check_cpuid(struct kvm_vcpu *vcpu, return fpu_enable_guest_xfd_features(&vcpu->arch.guest_fpu, xfeatures); } +/* Check whether the supplied CPUID data is equal to what is already set for the vCPU. */ +static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, + int nent) +{ + struct kvm_cpuid_entry2 *orig; + int i; + + if (nent != vcpu->arch.cpuid_nent) + return -EINVAL; + + for (i = 0; i < nent; i++) { + orig = &vcpu->arch.cpuid_entries[i]; + if (e2[i].function != orig->function || + e2[i].index != orig->index || + e2[i].flags != orig->flags || + e2[i].eax != orig->eax || e2[i].ebx != orig->ebx || + e2[i].ecx != orig->ecx || e2[i].edx != orig->edx) + return -EINVAL; + } + + return 0; +} + static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu) { u32 function; @@ -145,14 +168,21 @@ static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu) } } -static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) +static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu, + struct kvm_cpuid_entry2 *entries, int nent) { u32 base = vcpu->arch.kvm_cpuid_base; if (!base) return NULL; - return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0); + return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, 0); +} + +static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) +{ + return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent); } void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) @@ -167,11 +197,28 @@ void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) vcpu->arch.pv_cpuid.features = best->eax; } -void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) +/* + * Calculate guest's supported XCR0 taking into account guest CPUID data and + * supported_xcr0 (comprised of host configuration and KVM_SUPPORTED_XCR0). + */ +static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent) { struct kvm_cpuid_entry2 *best; - best = kvm_find_cpuid_entry(vcpu, 1, 0); + best = cpuid_entry2_find(entries, nent, 0xd, 0); + if (!best) + return 0; + + return (best->eax | ((u64)best->edx << 32)) & supported_xcr0; +} + +static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries, + int nent) +{ + struct kvm_cpuid_entry2 *best; + u64 guest_supported_xcr0 = cpuid_get_supported_xcr0(entries, nent); + + best = cpuid_entry2_find(entries, nent, 1, 0); if (best) { /* Update OSXSAVE bit */ if (boot_cpu_has(X86_FEATURE_XSAVE)) @@ -182,32 +229,52 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE); } - best = kvm_find_cpuid_entry(vcpu, 7, 0); + best = cpuid_entry2_find(entries, nent, 7, 0); if (best && boot_cpu_has(X86_FEATURE_PKU) && best->function == 0x7) cpuid_entry_change(best, X86_FEATURE_OSPKE, kvm_read_cr4_bits(vcpu, X86_CR4_PKE)); - best = kvm_find_cpuid_entry(vcpu, 0xD, 0); + best = cpuid_entry2_find(entries, nent, 0xD, 0); if (best) best->ebx = xstate_required_size(vcpu->arch.xcr0, false); - best = kvm_find_cpuid_entry(vcpu, 0xD, 1); + best = cpuid_entry2_find(entries, nent, 0xD, 1); if (best && (cpuid_entry_has(best, X86_FEATURE_XSAVES) || cpuid_entry_has(best, X86_FEATURE_XSAVEC))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - best = kvm_find_kvm_cpuid_features(vcpu); + best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent); if (kvm_hlt_in_guest(vcpu->kvm) && best && (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { - best = kvm_find_cpuid_entry(vcpu, 0x1, 0); + best = cpuid_entry2_find(entries, nent, 0x1, 0); if (best) cpuid_entry_change(best, X86_FEATURE_MWAIT, vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT); } + + /* + * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate + * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's + * requested XCR0 value. The enclave's XFRM must be a subset of XCRO + * at the time of EENTER, thus adjust the allowed XFRM by the guest's + * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to + * '1' even on CPUs that don't support XSAVE. + */ + best = cpuid_entry2_find(entries, nent, 0x12, 0x1); + if (best) { + best->ecx &= guest_supported_xcr0 & 0xffffffff; + best->edx &= guest_supported_xcr0 >> 32; + best->ecx |= XFEATURE_MASK_FPSSE; + } +} + +void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu) +{ + __kvm_update_cpuid_runtime(vcpu, vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); } EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime); @@ -226,27 +293,8 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_apic_set_version(vcpu); } - best = kvm_find_cpuid_entry(vcpu, 0xD, 0); - if (!best) - vcpu->arch.guest_supported_xcr0 = 0; - else - vcpu->arch.guest_supported_xcr0 = - (best->eax | ((u64)best->edx << 32)) & supported_xcr0; - - /* - * Bits 127:0 of the allowed SECS.ATTRIBUTES (CPUID.0x12.0x1) enumerate - * the supported XSAVE Feature Request Mask (XFRM), i.e. the enclave's - * requested XCR0 value. The enclave's XFRM must be a subset of XCRO - * at the time of EENTER, thus adjust the allowed XFRM by the guest's - * supported XCR0. Similar to XCR0 handling, FP and SSE are forced to - * '1' even on CPUs that don't support XSAVE. - */ - best = kvm_find_cpuid_entry(vcpu, 0x12, 0x1); - if (best) { - best->ecx &= vcpu->arch.guest_supported_xcr0 & 0xffffffff; - best->edx &= vcpu->arch.guest_supported_xcr0 >> 32; - best->ecx |= XFEATURE_MASK_FPSSE; - } + vcpu->arch.guest_supported_xcr0 = + cpuid_get_supported_xcr0(vcpu->arch.cpuid_entries, vcpu->arch.cpuid_nent); kvm_update_pv_runtime(vcpu); @@ -298,6 +346,28 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, { int r; + __kvm_update_cpuid_runtime(vcpu, e2, nent); + + /* + * KVM does not correctly handle changing guest CPUID after KVM_RUN, as + * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't + * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page + * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with + * the core vCPU model on the fly. It would've been better to forbid any + * KVM_SET_CPUID{,2} calls after KVM_RUN altogether but unfortunately + * some VMMs (e.g. QEMU) reuse vCPU fds for CPU hotplug/unplug and do + * KVM_SET_CPUID{,2} again. To support this legacy behavior, check + * whether the supplied CPUID data is equal to what's already set. + */ + if (vcpu->arch.last_vmentry_cpu != -1) { + r = kvm_cpuid_check_equal(vcpu, e2, nent); + if (r) + return r; + + kvfree(e2); + return 0; + } + r = kvm_check_cpuid(vcpu, e2, nent); if (r) return r; @@ -307,7 +377,6 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, vcpu->arch.cpuid_nent = nent; kvm_update_kvm_cpuid_base(vcpu); - kvm_update_cpuid_runtime(vcpu); kvm_vcpu_after_set_cpuid(vcpu); return 0; @@ -485,12 +554,13 @@ void kvm_set_cpu_caps(void) ); kvm_cpu_cap_mask(CPUID_7_0_EBX, - F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | - F(BMI2) | F(ERMS) | F(INVPCID) | F(RTM) | 0 /*MPX*/ | F(RDSEED) | - F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) | - F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) | - F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/ - ); + F(FSGSBASE) | F(SGX) | F(BMI1) | F(HLE) | F(AVX2) | + F(FDP_EXCPTN_ONLY) | F(SMEP) | F(BMI2) | F(ERMS) | F(INVPCID) | + F(RTM) | F(ZERO_FCS_FDS) | 0 /*MPX*/ | F(AVX512F) | + F(AVX512DQ) | F(RDSEED) | F(ADX) | F(SMAP) | F(AVX512IFMA) | + F(CLFLUSHOPT) | F(CLWB) | 0 /*INTEL_PT*/ | F(AVX512PF) | + F(AVX512ER) | F(AVX512CD) | F(SHA_NI) | F(AVX512BW) | + F(AVX512VL)); kvm_cpu_cap_mask(CPUID_7_ECX, F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ | F(RDPID) | @@ -795,10 +865,10 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) perf_get_x86_pmu_capability(&cap); /* - * Only support guest architectural pmu on a host - * with architectural pmu. + * The guest architecture pmu is only supported if the architecture + * pmu exists on the host and the module parameters allow it. */ - if (!cap.version) + if (!cap.version || !enable_pmu) memset(&cap, 0, sizeof(cap)); eax.split.version_id = min(cap.version, 2); @@ -837,13 +907,14 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) } break; case 0xd: { - u64 guest_perm = xstate_get_guest_group_perm(); + u64 permitted_xcr0 = supported_xcr0 & xstate_get_guest_group_perm(); + u64 permitted_xss = supported_xss; - entry->eax &= supported_xcr0 & guest_perm; - entry->ebx = xstate_required_size(supported_xcr0, false); + entry->eax &= permitted_xcr0; + entry->ebx = xstate_required_size(permitted_xcr0, false); entry->ecx = entry->ebx; - entry->edx &= (supported_xcr0 & guest_perm) >> 32; - if (!supported_xcr0) + entry->edx &= permitted_xcr0 >> 32; + if (!permitted_xcr0) break; entry = do_host_cpuid(array, function, 1); @@ -852,20 +923,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) cpuid_entry_override(entry, CPUID_D_1_EAX); if (entry->eax & (F(XSAVES)|F(XSAVEC))) - entry->ebx = xstate_required_size(supported_xcr0 | supported_xss, + entry->ebx = xstate_required_size(permitted_xcr0 | permitted_xss, true); else { - WARN_ON_ONCE(supported_xss != 0); + WARN_ON_ONCE(permitted_xss != 0); entry->ebx = 0; } - entry->ecx &= supported_xss; - entry->edx &= supported_xss >> 32; + entry->ecx &= permitted_xss; + entry->edx &= permitted_xss >> 32; for (i = 2; i < 64; ++i) { bool s_state; - if (supported_xcr0 & BIT_ULL(i)) + if (permitted_xcr0 & BIT_ULL(i)) s_state = false; - else if (supported_xss & BIT_ULL(i)) + else if (permitted_xss & BIT_ULL(i)) s_state = true; else continue; @@ -879,13 +950,16 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) * invalid sub-leafs. Only valid sub-leafs should * reach this point, and they should have a non-zero * save state size. Furthermore, check whether the - * processor agrees with supported_xcr0/supported_xss + * processor agrees with permitted_xcr0/permitted_xss * on whether this is an XCR0- or IA32_XSS-managed area. */ if (WARN_ON_ONCE(!entry->eax || (entry->ecx & 0x1) != s_state)) { --array->nent; continue; } + + if (!kvm_cpu_cap_has(X86_FEATURE_XFD)) + entry->ecx &= ~BIT_ULL(2); entry->edx = 0; } break; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index c5028e6b0f96d10244e7248e51a52c9bd2c76d44..d7e6fde82d254408fe3b208eb5631d1c718c71a7 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1096,14 +1096,8 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic->regs + APIC_TMR); } - if (static_call(kvm_x86_deliver_posted_interrupt)(vcpu, vector)) { - kvm_lapic_set_irr(vector, apic); - kvm_make_request(KVM_REQ_EVENT, vcpu); - kvm_vcpu_kick(vcpu); - } else { - trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, - trig_mode, vector); - } + static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode, + trig_mode, vector); break; case APIC_DM_REMRD: @@ -1950,7 +1944,6 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu) { restart_apic_timer(vcpu->arch.apic); } -EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer); void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) { @@ -1962,7 +1955,6 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu) start_sw_timer(apic); preempt_enable(); } -EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) { @@ -2631,7 +2623,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) kvm_apic_set_version(vcpu); apic_update_ppr(apic); - hrtimer_cancel(&apic->lapic_timer.timer); + cancel_apic_timer(apic); apic->lapic_timer.expired_tscdeadline = 0; apic_update_lvtt(apic); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 1d275e9d76b5fba5497c2e26c94409549c3ea534..593093b523953802334d5f83767efadc403892c7 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5756,6 +5756,7 @@ static bool __kvm_zap_rmaps(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) continue; flush = slot_handle_level_range(kvm, memslot, kvm_zap_rmapp, + PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL, start, end - 1, true, flush); } @@ -5825,15 +5826,27 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, } /* - * We can flush all the TLBs out of the mmu lock without TLB - * corruption since we just change the spte from writable to - * readonly so that we only need to care the case of changing - * spte from present to present (changing the spte from present - * to nonpresent will flush all the TLBs immediately), in other - * words, the only case we care is mmu_spte_update() where we - * have checked Host-writable | MMU-writable instead of - * PT_WRITABLE_MASK, that means it does not depend on PT_WRITABLE_MASK - * anymore. + * Flush TLBs if any SPTEs had to be write-protected to ensure that + * guest writes are reflected in the dirty bitmap before the memslot + * update completes, i.e. before enabling dirty logging is visible to + * userspace. + * + * Perform the TLB flush outside the mmu_lock to reduce the amount of + * time the lock is held. However, this does mean that another CPU can + * now grab mmu_lock and encounter a write-protected SPTE while CPUs + * still have a writable mapping for the associated GFN in their TLB. + * + * This is safe but requires KVM to be careful when making decisions + * based on the write-protection status of an SPTE. Specifically, KVM + * also write-protects SPTEs to monitor changes to guest page tables + * during shadow paging, and must guarantee no CPUs can write to those + * page before the lock is dropped. As mentioned in the previous + * paragraph, a write-protected SPTE is no guarantee that CPU cannot + * perform writes. So to determine if a TLB flush is truly required, KVM + * will clear a separate software-only bit (MMU-writable) and skip the + * flush if-and-only-if this bit was already clear. + * + * See DEFAULT_SPTE_MMU_WRITEABLE for more details. */ if (flush) kvm_arch_flush_remote_tlbs_memslot(kvm, memslot); diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 351b04ad62a18f10b11962b7b4db433171a8f5d9..73cfe62fdad1fd4ac55e0dba7e04466abf9bc33d 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -216,6 +216,7 @@ u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn) new_spte &= ~PT_WRITABLE_MASK; new_spte &= ~shadow_host_writable_mask; + new_spte &= ~shadow_mmu_writable_mask; new_spte = mark_spte_for_access_track(new_spte); diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index a4af2a42695c1bfffe8f5b161007ab5e9a872da9..be6a007a4af3111831396149f641f1c4a6b77964 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -60,10 +60,6 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); (((address) >> PT64_LEVEL_SHIFT(level)) & ((1 << PT64_LEVEL_BITS) - 1)) #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) -/* Bits 9 and 10 are ignored by all non-EPT PTEs. */ -#define DEFAULT_SPTE_HOST_WRITEABLE BIT_ULL(9) -#define DEFAULT_SPTE_MMU_WRITEABLE BIT_ULL(10) - /* * The mask/shift to use for saving the original R/X bits when marking the PTE * as not-present for access tracking purposes. We do not save the W bit as the @@ -78,6 +74,35 @@ static_assert(SPTE_TDP_AD_ENABLED_MASK == 0); SHADOW_ACC_TRACK_SAVED_BITS_SHIFT) static_assert(!(SPTE_TDP_AD_MASK & SHADOW_ACC_TRACK_SAVED_MASK)); +/* + * *_SPTE_HOST_WRITEABLE (aka Host-writable) indicates whether the host permits + * writes to the guest page mapped by the SPTE. This bit is cleared on SPTEs + * that map guest pages in read-only memslots and read-only VMAs. + * + * Invariants: + * - If Host-writable is clear, PT_WRITABLE_MASK must be clear. + * + * + * *_SPTE_MMU_WRITEABLE (aka MMU-writable) indicates whether the shadow MMU + * allows writes to the guest page mapped by the SPTE. This bit is cleared when + * the guest page mapped by the SPTE contains a page table that is being + * monitored for shadow paging. In this case the SPTE can only be made writable + * by unsyncing the shadow page under the mmu_lock. + * + * Invariants: + * - If MMU-writable is clear, PT_WRITABLE_MASK must be clear. + * - If MMU-writable is set, Host-writable must be set. + * + * If MMU-writable is set, PT_WRITABLE_MASK is normally set but can be cleared + * to track writes for dirty logging. For such SPTEs, KVM will locklessly set + * PT_WRITABLE_MASK upon the next write from the guest and record the write in + * the dirty log (see fast_page_fault()). + */ + +/* Bits 9 and 10 are ignored by all non-EPT PTEs. */ +#define DEFAULT_SPTE_HOST_WRITEABLE BIT_ULL(9) +#define DEFAULT_SPTE_MMU_WRITEABLE BIT_ULL(10) + /* * Low ignored bits are at a premium for EPT, use high ignored bits, taking care * to not overlap the A/D type mask or the saved access bits of access-tracked @@ -316,8 +341,13 @@ static __always_inline bool is_rsvd_spte(struct rsvd_bits_validate *rsvd_check, static inline bool spte_can_locklessly_be_made_writable(u64 spte) { - return (spte & shadow_host_writable_mask) && - (spte & shadow_mmu_writable_mask); + if (spte & shadow_mmu_writable_mask) { + WARN_ON_ONCE(!(spte & shadow_host_writable_mask)); + return true; + } + + WARN_ON_ONCE(spte & PT_WRITABLE_MASK); + return false; } static inline u64 get_mmio_spte_generation(u64 spte) diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 7b1bc816b7c38d1e3603e61ba072c413ea0fe62f..bc9e3553fba2d4ac0d0853ce74ad1cd2363036fa 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1442,12 +1442,12 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, !is_last_spte(iter.old_spte, iter.level)) continue; - if (!is_writable_pte(iter.old_spte)) - break; - new_spte = iter.old_spte & ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask); + if (new_spte == iter.old_spte) + break; + tdp_mmu_set_spte(kvm, &iter, new_spte); spte_set = true; } diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 261b39cbef6ea52c77473a097839cf1f08438387..f614f95acc6b3e38b0a928e42cdd0f43c9180e02 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include #include "x86.h" #include "cpuid.h" @@ -109,6 +111,9 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, .config = config, }; + if (type == PERF_TYPE_HARDWARE && config >= PERF_COUNT_HW_MAX) + return; + attr.sample_period = get_sample_period(pmc, pmc->counter); if (in_tx) @@ -169,12 +174,16 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) return true; } +static int cmp_u64(const void *a, const void *b) +{ + return *(__u64 *)a - *(__u64 *)b; +} + void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) { unsigned config, type = PERF_TYPE_RAW; struct kvm *kvm = pmc->vcpu->kvm; struct kvm_pmu_event_filter *filter; - int i; bool allow_event = true; if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) @@ -189,16 +198,13 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu); if (filter) { - for (i = 0; i < filter->nevents; i++) - if (filter->events[i] == - (eventsel & AMD64_RAW_EVENT_MASK_NB)) - break; - if (filter->action == KVM_PMU_EVENT_ALLOW && - i == filter->nevents) - allow_event = false; - if (filter->action == KVM_PMU_EVENT_DENY && - i < filter->nevents) - allow_event = false; + __u64 key = eventsel & AMD64_RAW_EVENT_MASK_NB; + + if (bsearch(&key, filter->events, filter->nevents, + sizeof(__u64), cmp_u64)) + allow_event = filter->action == KVM_PMU_EVENT_ALLOW; + else + allow_event = filter->action == KVM_PMU_EVENT_DENY; } if (!allow_event) return; @@ -573,6 +579,11 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) /* Ensure nevents can't be changed between the user copies. */ *filter = tmp; + /* + * Sort the in-kernel list so that we can search it with bsearch. + */ + sort(&filter->events, filter->nevents, sizeof(__u64), cmp_u64, NULL); + mutex_lock(&kvm->lock); filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter, mutex_is_locked(&kvm->lock)); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 0e5b492940867b037236959a76629ce489429d0e..90364d02f22aada1bf890b85eb2ccf9311e98c02 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -295,13 +295,16 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source, struct kvm_vcpu *vcpu; unsigned long i; + /* + * Wake any target vCPUs that are blocking, i.e. waiting for a wake + * event. There's no need to signal doorbells, as hardware has handled + * vCPUs that were in guest at the time of the IPI, and vCPUs that have + * since entered the guest will have processed pending IRQs at VMRUN. + */ kvm_for_each_vcpu(i, vcpu, kvm) { - bool m = kvm_apic_match_dest(vcpu, source, - icrl & APIC_SHORT_MASK, - GET_APIC_DEST_FIELD(icrh), - icrl & APIC_DEST_MASK); - - if (m && !avic_vcpu_is_running(vcpu)) + if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK, + GET_APIC_DEST_FIELD(icrh), + icrl & APIC_DEST_MASK)) kvm_vcpu_wake_up(vcpu); } } @@ -672,9 +675,22 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) return -1; kvm_lapic_set_irr(vec, vcpu->arch.apic); + + /* + * Pairs with the smp_mb_*() after setting vcpu->guest_mode in + * vcpu_enter_guest() to ensure the write to the vIRR is ordered before + * the read of guest_mode, which guarantees that either VMRUN will see + * and process the new vIRR entry, or that the below code will signal + * the doorbell if the vCPU is already running in the guest. + */ smp_mb__after_atomic(); - if (avic_vcpu_is_running(vcpu)) { + /* + * Signal the doorbell to tell hardware to inject the IRQ if the vCPU + * is in the guest. If the vCPU is not in the guest, hardware will + * automatically process AVIC interrupts at VMRUN. + */ + if (vcpu->mode == IN_GUEST_MODE) { int cpu = READ_ONCE(vcpu->cpu); /* @@ -688,8 +704,13 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) if (cpu != get_cpu()) wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpu)); put_cpu(); - } else + } else { + /* + * Wake the vCPU if it was blocking. KVM will then detect the + * pending IRQ when checking if the vCPU has a wake event. + */ kvm_vcpu_wake_up(vcpu); + } return 0; } @@ -957,6 +978,8 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); + lockdep_assert_preemption_disabled(); + /* * Since the host physical APIC id is 8 bits, * we can support host APIC ID upto 255. @@ -964,19 +987,25 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; + /* + * No need to update anything if the vCPU is blocking, i.e. if the vCPU + * is being scheduled in after being preempted. The CPU entries in the + * Physical APIC table and IRTE are consumed iff IsRun{ning} is '1'. + * If the vCPU was migrated, its new CPU value will be stuffed when the + * vCPU unblocks. + */ + if (kvm_vcpu_is_blocking(vcpu)) + return; + entry = READ_ONCE(*(svm->avic_physical_id_cache)); WARN_ON(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK; entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK); - - entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; - if (svm->avic_is_running) - entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; + entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; WRITE_ONCE(*(svm->avic_physical_id_cache), entry); - avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, - svm->avic_is_running); + avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); } void avic_vcpu_put(struct kvm_vcpu *vcpu) @@ -984,42 +1013,56 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) u64 entry; struct vcpu_svm *svm = to_svm(vcpu); + lockdep_assert_preemption_disabled(); + entry = READ_ONCE(*(svm->avic_physical_id_cache)); - if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK) - avic_update_iommu_vcpu_affinity(vcpu, -1, 0); + + /* Nothing to do if IsRunning == '0' due to vCPU blocking. */ + if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)) + return; + + avic_update_iommu_vcpu_affinity(vcpu, -1, 0); entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } -/* - * This function is called during VCPU halt/unhalt. - */ -static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) +void avic_vcpu_blocking(struct kvm_vcpu *vcpu) { - struct vcpu_svm *svm = to_svm(vcpu); - int cpu = get_cpu(); - - WARN_ON(cpu != vcpu->cpu); - svm->avic_is_running = is_run; + if (!kvm_vcpu_apicv_active(vcpu)) + return; - if (kvm_vcpu_apicv_active(vcpu)) { - if (is_run) - avic_vcpu_load(vcpu, cpu); - else - avic_vcpu_put(vcpu); - } - put_cpu(); + preempt_disable(); + + /* + * Unload the AVIC when the vCPU is about to block, _before_ + * the vCPU actually blocks. + * + * Any IRQs that arrive before IsRunning=0 will not cause an + * incomplete IPI vmexit on the source, therefore vIRR will also + * be checked by kvm_vcpu_check_block() before blocking. The + * memory barrier implicit in set_current_state orders writing + * IsRunning=0 before reading the vIRR. The processor needs a + * matching memory barrier on interrupt delivery between writing + * IRR and reading IsRunning; the lack of this barrier might be + * the cause of errata #1235). + */ + avic_vcpu_put(vcpu); + + preempt_enable(); } -void svm_vcpu_blocking(struct kvm_vcpu *vcpu) +void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) { - avic_set_running(vcpu, false); -} + int cpu; -void svm_vcpu_unblocking(struct kvm_vcpu *vcpu) -{ - if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu)) - kvm_vcpu_update_apicv(vcpu); - avic_set_running(vcpu, true); + if (!kvm_vcpu_apicv_active(vcpu)) + return; + + cpu = get_cpu(); + WARN_ON(cpu != vcpu->cpu); + + avic_vcpu_load(vcpu, cpu); + + put_cpu(); } diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index cf206855ebf099a1a2e9ca1304b8fbd7235502fc..1218b5a342fc8574f3da905a190fce0713291438 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -983,9 +983,9 @@ void svm_free_nested(struct vcpu_svm *svm) /* * Forcibly leave nested mode in order to be able to reset the VCPU later on. */ -void svm_leave_nested(struct vcpu_svm *svm) +void svm_leave_nested(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu = &svm->vcpu; + struct vcpu_svm *svm = to_svm(vcpu); if (is_guest_mode(vcpu)) { svm->nested.nested_run_pending = 0; @@ -1411,7 +1411,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { - svm_leave_nested(svm); + svm_leave_nested(vcpu); svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); return 0; } @@ -1478,7 +1478,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, */ if (is_guest_mode(vcpu)) - svm_leave_nested(svm); + svm_leave_nested(vcpu); else svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; @@ -1532,6 +1532,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) } struct kvm_x86_nested_ops svm_nested_ops = { + .leave_nested = svm_leave_nested, .check_events = svm_check_nested_events, .triple_fault = nested_svm_triple_fault, .get_nested_state_pages = svm_get_nested_state_pages, diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 12d8b301065ab940f0ee3c71d6f0e47d49038fbb..5aa45f13b16dca9e1b071c2bcd630641f6c719b9 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -101,7 +101,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, { struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); - if (!pmu) + if (!enable_pmu) return NULL; switch (msr) { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 6a22798eaaee9d50ed2f49137d3a56fe67316fda..17b53457d866416a15cf872ac40169bd0f9230fd 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2100,8 +2100,13 @@ void __init sev_hardware_setup(void) if (!sev_enabled || !npt_enabled) goto out; - /* Does the CPU support SEV? */ - if (!boot_cpu_has(X86_FEATURE_SEV)) + /* + * SEV must obviously be supported in hardware. Sanity check that the + * CPU supports decode assists, which is mandatory for SEV guests to + * support instruction emulation. + */ + if (!boot_cpu_has(X86_FEATURE_SEV) || + WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_DECODEASSISTS))) goto out; /* Retrieve SEV CPUID information */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 46bcc706f25740b3f0a1956a4d2124fda095306d..a290efb272ad1641f36c33b8b6bbca22bdf89a10 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -192,10 +192,6 @@ module_param(vgif, int, 0444); static int lbrv = true; module_param(lbrv, int, 0444); -/* enable/disable PMU virtualization */ -bool pmu = true; -module_param(pmu, bool, 0444); - static int tsc_scaling = true; module_param(tsc_scaling, int, 0444); @@ -294,7 +290,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) { if (!(efer & EFER_SVME)) { - svm_leave_nested(svm); + svm_leave_nested(vcpu); svm_set_gif(svm, true); /* #GP intercept is still needed for vmware backdoor */ if (!enable_vmware_backdoor) @@ -316,7 +312,11 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) return ret; } - if (svm_gp_erratum_intercept) + /* + * Never intercept #GP for SEV guests, KVM can't + * decrypt guest memory to workaround the erratum. + */ + if (svm_gp_erratum_intercept && !sev_guest(vcpu->kvm)) set_exception_intercept(svm, GP_VECTOR); } } @@ -873,47 +873,6 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) } } -/* - * The default MMIO mask is a single bit (excluding the present bit), - * which could conflict with the memory encryption bit. Check for - * memory encryption support and override the default MMIO mask if - * memory encryption is enabled. - */ -static __init void svm_adjust_mmio_mask(void) -{ - unsigned int enc_bit, mask_bit; - u64 msr, mask; - - /* If there is no memory encryption support, use existing mask */ - if (cpuid_eax(0x80000000) < 0x8000001f) - return; - - /* If memory encryption is not enabled, use existing mask */ - rdmsrl(MSR_AMD64_SYSCFG, msr); - if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) - return; - - enc_bit = cpuid_ebx(0x8000001f) & 0x3f; - mask_bit = boot_cpu_data.x86_phys_bits; - - /* Increment the mask bit if it is the same as the encryption bit */ - if (enc_bit == mask_bit) - mask_bit++; - - /* - * If the mask bit location is below 52, then some bits above the - * physical addressing limit will always be reserved, so use the - * rsvd_bits() function to generate the mask. This mask, along with - * the present bit, will be used to generate a page fault with - * PFER.RSV = 1. - * - * If the mask bit location is 52 (or above), then clear the mask. - */ - mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; - - kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); -} - static void svm_hardware_teardown(void) { int cpu; @@ -928,198 +887,6 @@ static void svm_hardware_teardown(void) iopm_base = 0; } -static __init void svm_set_cpu_caps(void) -{ - kvm_set_cpu_caps(); - - supported_xss = 0; - - /* CPUID 0x80000001 and 0x8000000A (SVM features) */ - if (nested) { - kvm_cpu_cap_set(X86_FEATURE_SVM); - - if (nrips) - kvm_cpu_cap_set(X86_FEATURE_NRIPS); - - if (npt_enabled) - kvm_cpu_cap_set(X86_FEATURE_NPT); - - if (tsc_scaling) - kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR); - - /* Nested VM can receive #VMEXIT instead of triggering #GP */ - kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK); - } - - /* CPUID 0x80000008 */ - if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) || - boot_cpu_has(X86_FEATURE_AMD_SSBD)) - kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD); - - /* AMD PMU PERFCTR_CORE CPUID */ - if (pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) - kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE); - - /* CPUID 0x8000001F (SME/SEV features) */ - sev_set_cpu_caps(); -} - -static __init int svm_hardware_setup(void) -{ - int cpu; - struct page *iopm_pages; - void *iopm_va; - int r; - unsigned int order = get_order(IOPM_SIZE); - - /* - * NX is required for shadow paging and for NPT if the NX huge pages - * mitigation is enabled. - */ - if (!boot_cpu_has(X86_FEATURE_NX)) { - pr_err_ratelimited("NX (Execute Disable) not supported\n"); - return -EOPNOTSUPP; - } - kvm_enable_efer_bits(EFER_NX); - - iopm_pages = alloc_pages(GFP_KERNEL, order); - - if (!iopm_pages) - return -ENOMEM; - - iopm_va = page_address(iopm_pages); - memset(iopm_va, 0xff, PAGE_SIZE * (1 << order)); - iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; - - init_msrpm_offsets(); - - supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); - - if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) - kvm_enable_efer_bits(EFER_FFXSR); - - if (tsc_scaling) { - if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { - tsc_scaling = false; - } else { - pr_info("TSC scaling supported\n"); - kvm_has_tsc_control = true; - kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; - kvm_tsc_scaling_ratio_frac_bits = 32; - } - } - - tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); - - /* Check for pause filtering support */ - if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { - pause_filter_count = 0; - pause_filter_thresh = 0; - } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) { - pause_filter_thresh = 0; - } - - if (nested) { - printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); - kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); - } - - /* - * KVM's MMU doesn't support using 2-level paging for itself, and thus - * NPT isn't supported if the host is using 2-level paging since host - * CR4 is unchanged on VMRUN. - */ - if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE)) - npt_enabled = false; - - if (!boot_cpu_has(X86_FEATURE_NPT)) - npt_enabled = false; - - /* Force VM NPT level equal to the host's paging level */ - kvm_configure_mmu(npt_enabled, get_npt_level(), - get_npt_level(), PG_LEVEL_1G); - pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); - - /* Note, SEV setup consumes npt_enabled. */ - sev_hardware_setup(); - - svm_hv_hardware_setup(); - - svm_adjust_mmio_mask(); - - for_each_possible_cpu(cpu) { - r = svm_cpu_init(cpu); - if (r) - goto err; - } - - if (nrips) { - if (!boot_cpu_has(X86_FEATURE_NRIPS)) - nrips = false; - } - - enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC); - - if (enable_apicv) { - pr_info("AVIC enabled\n"); - - amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); - } - - if (vls) { - if (!npt_enabled || - !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) || - !IS_ENABLED(CONFIG_X86_64)) { - vls = false; - } else { - pr_info("Virtual VMLOAD VMSAVE supported\n"); - } - } - - if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK)) - svm_gp_erratum_intercept = false; - - if (vgif) { - if (!boot_cpu_has(X86_FEATURE_VGIF)) - vgif = false; - else - pr_info("Virtual GIF supported\n"); - } - - if (lbrv) { - if (!boot_cpu_has(X86_FEATURE_LBRV)) - lbrv = false; - else - pr_info("LBR virtualization supported\n"); - } - - if (!pmu) - pr_info("PMU virtualization is disabled\n"); - - svm_set_cpu_caps(); - - /* - * It seems that on AMD processors PTE's accessed bit is - * being set by the CPU hardware before the NPF vmexit. - * This is not expected behaviour and our tests fail because - * of it. - * A workaround here is to disable support for - * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled. - * In this case userspace can know if there is support using - * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle - * it - * If future AMD CPU models change the behaviour described above, - * this variable can be changed accordingly - */ - allow_smaller_maxphyaddr = !npt_enabled; - - return 0; - -err: - svm_hardware_teardown(); - return r; -} - static void init_seg(struct vmcb_seg *seg) { seg->selector = 0; @@ -1247,9 +1014,10 @@ static void init_vmcb(struct kvm_vcpu *vcpu) * Guest access to VMware backdoor ports could legitimately * trigger #GP because of TSS I/O permission bitmap. * We intercept those #GP and allow access to them anyway - * as VMware does. + * as VMware does. Don't intercept #GP for SEV guests as KVM can't + * decrypt guest memory to decode the faulting instruction. */ - if (enable_vmware_backdoor) + if (enable_vmware_backdoor && !sev_guest(vcpu->kvm)) set_exception_intercept(svm, GP_VECTOR); svm_set_intercept(svm, INTERCEPT_INTR); @@ -1444,12 +1212,6 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) if (err) goto error_free_vmsa_page; - /* We initialize this flag to true to make sure that the is_running - * bit would be set the first time the vcpu is loaded. - */ - if (irqchip_in_kernel(vcpu->kvm) && kvm_apicv_activated(vcpu->kvm)) - svm->avic_is_running = true; - svm->msrpm = svm_vcpu_alloc_msrpm(); if (!svm->msrpm) { err = -ENOMEM; @@ -2334,10 +2096,6 @@ static int gp_interception(struct kvm_vcpu *vcpu) if (error_code) goto reinject; - /* All SVM instructions expect page aligned RAX */ - if (svm->vmcb->save.rax & ~PAGE_MASK) - goto reinject; - /* Decode the instruction for usage later */ if (x86_decode_emulated_instruction(vcpu, 0, NULL, 0) != EMULATION_OK) goto reinject; @@ -2355,8 +2113,13 @@ static int gp_interception(struct kvm_vcpu *vcpu) if (!is_guest_mode(vcpu)) return kvm_emulate_instruction(vcpu, EMULTYPE_VMWARE_GP | EMULTYPE_NO_DECODE); - } else + } else { + /* All SVM instructions expect page aligned RAX */ + if (svm->vmcb->save.rax & ~PAGE_MASK) + goto reinject; + return emulate_svm_instr(vcpu, opcode); + } reinject: kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); @@ -3528,6 +3291,21 @@ static void svm_set_irq(struct kvm_vcpu *vcpu) SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; } +static void svm_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector) +{ + struct kvm_vcpu *vcpu = apic->vcpu; + + if (svm_deliver_avic_intr(vcpu, vector)) { + kvm_lapic_set_irr(vector, apic); + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + } else { + trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, + trig_mode, vector); + } +} + static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3833,6 +3611,11 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) svm_complete_interrupts(vcpu); } +static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) +{ + return 1; +} + static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && @@ -3847,7 +3630,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); unsigned long vmcb_pa = svm->current_vmcb->pa; - kvm_guest_enter_irqoff(); + guest_state_enter_irqoff(); if (sev_es_guest(vcpu->kvm)) { __svm_sev_es_vcpu_run(vmcb_pa); @@ -3867,7 +3650,7 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu) vmload(__sme_page_pa(sd->save_area)); } - kvm_guest_exit_irqoff(); + guest_state_exit_irqoff(); } static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) @@ -4490,79 +4273,140 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu) } } -static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) +static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { bool smep, smap, is_user; unsigned long cr4; + u64 error_code; - /* - * When the guest is an SEV-ES guest, emulation is not possible. - */ + /* Emulation is always possible when KVM has access to all guest state. */ + if (!sev_guest(vcpu->kvm)) + return true; + + /* #UD and #GP should never be intercepted for SEV guests. */ + WARN_ON_ONCE(emul_type & (EMULTYPE_TRAP_UD | + EMULTYPE_TRAP_UD_FORCED | + EMULTYPE_VMWARE_GP)); + + /* + * Emulation is impossible for SEV-ES guests as KVM doesn't have access + * to guest register state. + */ if (sev_es_guest(vcpu->kvm)) return false; + /* + * Emulation is possible if the instruction is already decoded, e.g. + * when completing I/O after returning from userspace. + */ + if (emul_type & EMULTYPE_NO_DECODE) + return true; + + /* + * Emulation is possible for SEV guests if and only if a prefilled + * buffer containing the bytes of the intercepted instruction is + * available. SEV guest memory is encrypted with a guest specific key + * and cannot be decrypted by KVM, i.e. KVM would read cyphertext and + * decode garbage. + * + * Inject #UD if KVM reached this point without an instruction buffer. + * In practice, this path should never be hit by a well-behaved guest, + * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path + * is still theoretically reachable, e.g. via unaccelerated fault-like + * AVIC access, and needs to be handled by KVM to avoid putting the + * guest into an infinite loop. Injecting #UD is somewhat arbitrary, + * but its the least awful option given lack of insight into the guest. + */ + if (unlikely(!insn)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return false; + } + + /* + * Emulate for SEV guests if the insn buffer is not empty. The buffer + * will be empty if the DecodeAssist microcode cannot fetch bytes for + * the faulting instruction because the code fetch itself faulted, e.g. + * the guest attempted to fetch from emulated MMIO or a guest page + * table used to translate CS:RIP resides in emulated MMIO. + */ + if (likely(insn_len)) + return true; + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * * Errata: - * When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is - * possible that CPU microcode implementing DecodeAssist will fail - * to read bytes of instruction which caused #NPF. In this case, - * GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly - * return 0 instead of the correct guest instruction bytes. + * When CPU raises #NPF on guest data access and vCPU CR4.SMAP=1, it is + * possible that CPU microcode implementing DecodeAssist will fail to + * read guest memory at CS:RIP and vmcb.GuestIntrBytes will incorrectly + * be '0'. This happens because microcode reads CS:RIP using a _data_ + * loap uop with CPL=0 privileges. If the load hits a SMAP #PF, ucode + * gives up and does not fill the instruction bytes buffer. * - * This happens because CPU microcode reading instruction bytes - * uses a special opcode which attempts to read data using CPL=0 - * privileges. The microcode reads CS:RIP and if it hits a SMAP - * fault, it gives up and returns no instruction bytes. + * As above, KVM reaches this point iff the VM is an SEV guest, the CPU + * supports DecodeAssist, a #NPF was raised, KVM's page fault handler + * triggered emulation (e.g. for MMIO), and the CPU returned 0 in the + * GuestIntrBytes field of the VMCB. * - * Detection: - * We reach here in case CPU supports DecodeAssist, raised #NPF and - * returned 0 in GuestIntrBytes field of the VMCB. - * First, errata can only be triggered in case vCPU CR4.SMAP=1. - * Second, if vCPU CR4.SMEP=1, errata could only be triggered - * in case vCPU CPL==3 (Because otherwise guest would have triggered - * a SMEP fault instead of #NPF). - * Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL. - * As most guests enable SMAP if they have also enabled SMEP, use above - * logic in order to attempt minimize false-positive of detecting errata - * while still preserving all cases semantic correctness. + * This does _not_ mean that the erratum has been encountered, as the + * DecodeAssist will also fail if the load for CS:RIP hits a legitimate + * #PF, e.g. if the guest attempt to execute from emulated MMIO and + * encountered a reserved/not-present #PF. * - * Workaround: - * To determine what instruction the guest was executing, the hypervisor - * will have to decode the instruction at the instruction pointer. + * To hit the erratum, the following conditions must be true: + * 1. CR4.SMAP=1 (obviously). + * 2. CR4.SMEP=0 || CPL=3. If SMEP=1 and CPL<3, the erratum cannot + * have been hit as the guest would have encountered a SMEP + * violation #PF, not a #NPF. + * 3. The #NPF is not due to a code fetch, in which case failure to + * retrieve the instruction bytes is legitimate (see abvoe). * - * In non SEV guest, hypervisor will be able to read the guest - * memory to decode the instruction pointer when insn_len is zero - * so we return true to indicate that decoding is possible. - * - * But in the SEV guest, the guest memory is encrypted with the - * guest specific key and hypervisor will not be able to decode the - * instruction pointer so we will not able to workaround it. Lets - * print the error and request to kill the guest. + * In addition, don't apply the erratum workaround if the #NPF occurred + * while translating guest page tables (see below). */ - if (likely(!insn || insn_len)) - return true; - - /* - * If RIP is invalid, go ahead with emulation which will cause an - * internal error exit. - */ - if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT)) - return true; + error_code = to_svm(vcpu)->vmcb->control.exit_info_1; + if (error_code & (PFERR_GUEST_PAGE_MASK | PFERR_FETCH_MASK)) + goto resume_guest; cr4 = kvm_read_cr4(vcpu); smep = cr4 & X86_CR4_SMEP; smap = cr4 & X86_CR4_SMAP; is_user = svm_get_cpl(vcpu) == 3; if (smap && (!smep || is_user)) { - if (!sev_guest(vcpu->kvm)) - return true; - pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n"); - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + + /* + * If the fault occurred in userspace, arbitrarily inject #GP + * to avoid killing the guest and to hopefully avoid confusing + * the guest kernel too much, e.g. injecting #PF would not be + * coherent with respect to the guest's page tables. Request + * triple fault if the fault occurred in the kernel as there's + * no fault that KVM can inject without confusing the guest. + * In practice, the triple fault is moot as no sane SEV kernel + * will execute from user memory while also running with SMAP=1. + */ + if (is_user) + kvm_inject_gp(vcpu, 0); + else + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); } +resume_guest: + /* + * If the erratum was not hit, simply resume the guest and let it fault + * again. While awful, e.g. the vCPU may get stuck in an infinite loop + * if the fault is at CPL=0, it's the lesser of all evils. Exiting to + * userspace will kill the guest, and letting the emulator read garbage + * will yield random behavior and potentially corrupt the guest. + * + * Simply resuming the guest is technically not a violation of the SEV + * architecture. AMD's APM states that all code fetches and page table + * accesses for SEV guest are encrypted, regardless of the C-Bit. The + * APM also states that encrypted accesses to MMIO are "ignored", but + * doesn't explicitly define "ignored", i.e. doing nothing and letting + * the guest spin is technically "ignoring" the access. + */ return false; } @@ -4629,8 +4473,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .prepare_guest_switch = svm_prepare_guest_switch, .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, - .vcpu_blocking = svm_vcpu_blocking, - .vcpu_unblocking = svm_vcpu_unblocking, + .vcpu_blocking = avic_vcpu_blocking, + .vcpu_unblocking = avic_vcpu_unblocking, .update_exception_bitmap = svm_update_exception_bitmap, .get_msr_feature = svm_get_msr_feature, @@ -4662,6 +4506,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .tlb_flush_gva = svm_flush_tlb_gva, .tlb_flush_guest = svm_flush_tlb, + .vcpu_pre_run = svm_vcpu_pre_run, .run = svm_vcpu_run, .handle_exit = handle_exit, .skip_emulated_instruction = skip_emulated_instruction, @@ -4715,7 +4560,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .pmu_ops = &amd_pmu_ops, .nested_ops = &svm_nested_ops, - .deliver_posted_interrupt = svm_deliver_avic_intr, + .deliver_interrupt = svm_deliver_interrupt, .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, .update_pi_irte = svm_update_pi_irte, .setup_mce = svm_setup_mce, @@ -4742,6 +4587,243 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, }; +/* + * The default MMIO mask is a single bit (excluding the present bit), + * which could conflict with the memory encryption bit. Check for + * memory encryption support and override the default MMIO mask if + * memory encryption is enabled. + */ +static __init void svm_adjust_mmio_mask(void) +{ + unsigned int enc_bit, mask_bit; + u64 msr, mask; + + /* If there is no memory encryption support, use existing mask */ + if (cpuid_eax(0x80000000) < 0x8000001f) + return; + + /* If memory encryption is not enabled, use existing mask */ + rdmsrl(MSR_AMD64_SYSCFG, msr); + if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT)) + return; + + enc_bit = cpuid_ebx(0x8000001f) & 0x3f; + mask_bit = boot_cpu_data.x86_phys_bits; + + /* Increment the mask bit if it is the same as the encryption bit */ + if (enc_bit == mask_bit) + mask_bit++; + + /* + * If the mask bit location is below 52, then some bits above the + * physical addressing limit will always be reserved, so use the + * rsvd_bits() function to generate the mask. This mask, along with + * the present bit, will be used to generate a page fault with + * PFER.RSV = 1. + * + * If the mask bit location is 52 (or above), then clear the mask. + */ + mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; + + kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); +} + +static __init void svm_set_cpu_caps(void) +{ + kvm_set_cpu_caps(); + + supported_xss = 0; + + /* CPUID 0x80000001 and 0x8000000A (SVM features) */ + if (nested) { + kvm_cpu_cap_set(X86_FEATURE_SVM); + + if (nrips) + kvm_cpu_cap_set(X86_FEATURE_NRIPS); + + if (npt_enabled) + kvm_cpu_cap_set(X86_FEATURE_NPT); + + if (tsc_scaling) + kvm_cpu_cap_set(X86_FEATURE_TSCRATEMSR); + + /* Nested VM can receive #VMEXIT instead of triggering #GP */ + kvm_cpu_cap_set(X86_FEATURE_SVME_ADDR_CHK); + } + + /* CPUID 0x80000008 */ + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) || + boot_cpu_has(X86_FEATURE_AMD_SSBD)) + kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD); + + /* AMD PMU PERFCTR_CORE CPUID */ + if (enable_pmu && boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) + kvm_cpu_cap_set(X86_FEATURE_PERFCTR_CORE); + + /* CPUID 0x8000001F (SME/SEV features) */ + sev_set_cpu_caps(); +} + +static __init int svm_hardware_setup(void) +{ + int cpu; + struct page *iopm_pages; + void *iopm_va; + int r; + unsigned int order = get_order(IOPM_SIZE); + + /* + * NX is required for shadow paging and for NPT if the NX huge pages + * mitigation is enabled. + */ + if (!boot_cpu_has(X86_FEATURE_NX)) { + pr_err_ratelimited("NX (Execute Disable) not supported\n"); + return -EOPNOTSUPP; + } + kvm_enable_efer_bits(EFER_NX); + + iopm_pages = alloc_pages(GFP_KERNEL, order); + + if (!iopm_pages) + return -ENOMEM; + + iopm_va = page_address(iopm_pages); + memset(iopm_va, 0xff, PAGE_SIZE * (1 << order)); + iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT; + + init_msrpm_offsets(); + + supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); + + if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) + kvm_enable_efer_bits(EFER_FFXSR); + + if (tsc_scaling) { + if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR)) { + tsc_scaling = false; + } else { + pr_info("TSC scaling supported\n"); + kvm_has_tsc_control = true; + kvm_max_tsc_scaling_ratio = TSC_RATIO_MAX; + kvm_tsc_scaling_ratio_frac_bits = 32; + } + } + + tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX); + + /* Check for pause filtering support */ + if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) { + pause_filter_count = 0; + pause_filter_thresh = 0; + } else if (!boot_cpu_has(X86_FEATURE_PFTHRESHOLD)) { + pause_filter_thresh = 0; + } + + if (nested) { + printk(KERN_INFO "kvm: Nested Virtualization enabled\n"); + kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE); + } + + /* + * KVM's MMU doesn't support using 2-level paging for itself, and thus + * NPT isn't supported if the host is using 2-level paging since host + * CR4 is unchanged on VMRUN. + */ + if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE)) + npt_enabled = false; + + if (!boot_cpu_has(X86_FEATURE_NPT)) + npt_enabled = false; + + /* Force VM NPT level equal to the host's paging level */ + kvm_configure_mmu(npt_enabled, get_npt_level(), + get_npt_level(), PG_LEVEL_1G); + pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); + + /* Note, SEV setup consumes npt_enabled. */ + sev_hardware_setup(); + + svm_hv_hardware_setup(); + + svm_adjust_mmio_mask(); + + for_each_possible_cpu(cpu) { + r = svm_cpu_init(cpu); + if (r) + goto err; + } + + if (nrips) { + if (!boot_cpu_has(X86_FEATURE_NRIPS)) + nrips = false; + } + + enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC); + + if (enable_apicv) { + pr_info("AVIC enabled\n"); + + amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); + } else { + svm_x86_ops.vcpu_blocking = NULL; + svm_x86_ops.vcpu_unblocking = NULL; + } + + if (vls) { + if (!npt_enabled || + !boot_cpu_has(X86_FEATURE_V_VMSAVE_VMLOAD) || + !IS_ENABLED(CONFIG_X86_64)) { + vls = false; + } else { + pr_info("Virtual VMLOAD VMSAVE supported\n"); + } + } + + if (boot_cpu_has(X86_FEATURE_SVME_ADDR_CHK)) + svm_gp_erratum_intercept = false; + + if (vgif) { + if (!boot_cpu_has(X86_FEATURE_VGIF)) + vgif = false; + else + pr_info("Virtual GIF supported\n"); + } + + if (lbrv) { + if (!boot_cpu_has(X86_FEATURE_LBRV)) + lbrv = false; + else + pr_info("LBR virtualization supported\n"); + } + + if (!enable_pmu) + pr_info("PMU virtualization is disabled\n"); + + svm_set_cpu_caps(); + + /* + * It seems that on AMD processors PTE's accessed bit is + * being set by the CPU hardware before the NPF vmexit. + * This is not expected behaviour and our tests fail because + * of it. + * A workaround here is to disable support for + * GUEST_MAXPHYADDR < HOST_MAXPHYADDR if NPT is enabled. + * In this case userspace can know if there is support using + * KVM_CAP_SMALLER_MAXPHYADDR extension and decide how to handle + * it + * If future AMD CPU models change the behaviour described above, + * this variable can be changed accordingly + */ + allow_smaller_maxphyaddr = !npt_enabled; + + return 0; + +err: + svm_hardware_teardown(); + return r; +} + + static struct kvm_x86_init_ops svm_init_ops __initdata = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 9f153c59f2c8d5ba1442ee8c9e855589e8a39a65..73525353e4247b8dccbdc3901047fe238ae998cd 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -32,7 +32,6 @@ extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; extern bool intercept_smi; -extern bool pmu; /* * Clean bits in VMCB. @@ -226,7 +225,6 @@ struct vcpu_svm { u32 dfr_reg; struct page *avic_backing_page; u64 *avic_physical_id_cache; - bool avic_is_running; /* * Per-vcpu list of struct amd_svm_iommu_ir: @@ -306,11 +304,6 @@ static inline void vmcb_mark_all_clean(struct vmcb *vmcb) & ~VMCB_ALWAYS_DIRTY_MASK; } -static inline bool vmcb_is_clean(struct vmcb *vmcb, int bit) -{ - return (vmcb->control.clean & (1 << bit)); -} - static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit) { vmcb->control.clean &= ~(1 << bit); @@ -527,7 +520,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun); -void svm_leave_nested(struct vcpu_svm *svm); +void svm_leave_nested(struct kvm_vcpu *vcpu); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct kvm_vcpu *vcpu); @@ -574,17 +567,6 @@ extern struct kvm_x86_nested_ops svm_nested_ops; #define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL -static inline bool avic_vcpu_is_running(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - u64 *entry = svm->avic_physical_id_cache; - - if (!entry) - return false; - - return (READ_ONCE(*entry) & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK); -} - int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); @@ -605,8 +587,8 @@ int svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec); bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu); int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); -void svm_vcpu_blocking(struct kvm_vcpu *vcpu); -void svm_vcpu_unblocking(struct kvm_vcpu *vcpu); +void avic_vcpu_blocking(struct kvm_vcpu *vcpu); +void avic_vcpu_unblocking(struct kvm_vcpu *vcpu); /* sev.c */ diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index c53b8bf8d01386d716ebb35c470c1619e09826a7..489ca56212c617c0225ea226fea5530ea0a1edc9 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -46,6 +46,9 @@ static inline void svm_hv_init_vmcb(struct vmcb *vmcb) if (npt_enabled && ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) hve->hv_enlightenments_control.enlightened_npt_tlb = 1; + + if (ms_hyperv.nested_features & HV_X64_NESTED_MSR_BITMAP) + hve->hv_enlightenments_control.msr_bitmap = 1; } static inline void svm_hv_hardware_setup(void) @@ -83,14 +86,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( struct hv_enlightenments *hve = (struct hv_enlightenments *)vmcb->control.reserved_sw; - /* - * vmcb can be NULL if called during early vcpu init. - * And its okay not to mark vmcb dirty during vcpu init - * as we mark it dirty unconditionally towards end of vcpu - * init phase. - */ - if (vmcb_is_clean(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS) && - hve->hv_enlightenments_control.msr_bitmap) + if (hve->hv_enlightenments_control.msr_bitmap) vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); } diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index c8029b7845b6fbd7197028022b2937988735559d..3f430e2183753a5d594d8308a3d73477d921ebdc 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -5,6 +5,7 @@ #include #include "lapic.h" +#include "x86.h" extern bool __read_mostly enable_vpid; extern bool __read_mostly flexpriority_enabled; @@ -53,7 +54,6 @@ struct nested_vmx_msrs { struct vmcs_config { int size; - int order; u32 basic_cap; u32 revision_id; u32 pin_based_exec_ctrl; @@ -389,6 +389,9 @@ static inline u64 vmx_get_perf_capabilities(void) { u64 perf_cap = 0; + if (!enable_pmu) + return perf_cap; + if (boot_cpu_has(X86_FEATURE_PDCM)) rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_cap); diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index ba6f99f584ac33e1f0053bf0200cb4f6668aba98..87e3dc10edf40a4dbbfdfc356553b531fa63b40b 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -12,8 +12,6 @@ DEFINE_STATIC_KEY_FALSE(enable_evmcs); -#if IS_ENABLED(CONFIG_HYPERV) - #define EVMCS1_OFFSET(x) offsetof(struct hv_enlightened_vmcs, x) #define EVMCS1_FIELD(number, name, clean_field)[ROL16(number, 6)] = \ {EVMCS1_OFFSET(name), clean_field} @@ -296,6 +294,7 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { }; const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); +#if IS_ENABLED(CONFIG_HYPERV) __init void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) { vmcs_conf->pin_based_exec_ctrl &= ~EVMCS1_UNSUPPORTED_PINCTRL; @@ -362,6 +361,7 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata) case MSR_IA32_VMX_PROCBASED_CTLS2: ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC; break; + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: case MSR_IA32_VMX_PINBASED_CTLS: ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; break; diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index 16731d2cf231b5dc4da33c80093a044afb1227af..8d70f9aea94bc9edb90c1c874ef43fa5f0fd9e80 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -59,12 +59,12 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); SECONDARY_EXEC_SHADOW_VMCS | \ SECONDARY_EXEC_TSC_SCALING | \ SECONDARY_EXEC_PAUSE_LOOP_EXITING) -#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) +#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \ + (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) #define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING) -#if IS_ENABLED(CONFIG_HYPERV) - struct evmcs_field { u16 offset; u16 clean_field; @@ -73,26 +73,56 @@ struct evmcs_field { extern const struct evmcs_field vmcs_field_to_evmcs_1[]; extern const unsigned int nr_evmcs_1_fields; -static __always_inline int get_evmcs_offset(unsigned long field, - u16 *clean_field) +static __always_inline int evmcs_field_offset(unsigned long field, + u16 *clean_field) { unsigned int index = ROL16(field, 6); const struct evmcs_field *evmcs_field; - if (unlikely(index >= nr_evmcs_1_fields)) { - WARN_ONCE(1, "KVM: accessing unsupported EVMCS field %lx\n", - field); + if (unlikely(index >= nr_evmcs_1_fields)) return -ENOENT; - } evmcs_field = &vmcs_field_to_evmcs_1[index]; + /* + * Use offset=0 to detect holes in eVMCS. This offset belongs to + * 'revision_id' but this field has no encoding and is supposed to + * be accessed directly. + */ + if (unlikely(!evmcs_field->offset)) + return -ENOENT; + if (clean_field) *clean_field = evmcs_field->clean_field; return evmcs_field->offset; } +static inline u64 evmcs_read_any(struct hv_enlightened_vmcs *evmcs, + unsigned long field, u16 offset) +{ + /* + * vmcs12_read_any() doesn't care whether the supplied structure + * is 'struct vmcs12' or 'struct hv_enlightened_vmcs' as it takes + * the exact offset of the required field, use it for convenience + * here. + */ + return vmcs12_read_any((void *)evmcs, field, offset); +} + +#if IS_ENABLED(CONFIG_HYPERV) + +static __always_inline int get_evmcs_offset(unsigned long field, + u16 *clean_field) +{ + int offset = evmcs_field_offset(field, clean_field); + + WARN_ONCE(offset < 0, "KVM: accessing unsupported EVMCS field %lx\n", + field); + + return offset; +} + static __always_inline void evmcs_write64(unsigned long field, u64 value) { u16 clean_field; diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f235f77cbc037a1f1dc819ab5685dab5d34622a2..ba34e94049c79127756de6cfeed4159713033af0 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7,6 +7,7 @@ #include #include "cpuid.h" +#include "evmcs.h" #include "hyperv.h" #include "mmu.h" #include "nested.h" @@ -4851,18 +4852,20 @@ static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu) struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs; /* - * We should allocate a shadow vmcs for vmcs01 only when L1 - * executes VMXON and free it when L1 executes VMXOFF. - * As it is invalid to execute VMXON twice, we shouldn't reach - * here when vmcs01 already have an allocated shadow vmcs. + * KVM allocates a shadow VMCS only when L1 executes VMXON and frees it + * when L1 executes VMXOFF or the vCPU is forced out of nested + * operation. VMXON faults if the CPU is already post-VMXON, so it + * should be impossible to already have an allocated shadow VMCS. KVM + * doesn't support virtualization of VMCS shadowing, so vmcs01 should + * always be the loaded VMCS. */ - WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs); + if (WARN_ON(loaded_vmcs != &vmx->vmcs01 || loaded_vmcs->shadow_vmcs)) + return loaded_vmcs->shadow_vmcs; + + loaded_vmcs->shadow_vmcs = alloc_vmcs(true); + if (loaded_vmcs->shadow_vmcs) + vmcs_clear(loaded_vmcs->shadow_vmcs); - if (!loaded_vmcs->shadow_vmcs) { - loaded_vmcs->shadow_vmcs = alloc_vmcs(true); - if (loaded_vmcs->shadow_vmcs) - vmcs_clear(loaded_vmcs->shadow_vmcs); - } return loaded_vmcs->shadow_vmcs; } @@ -5099,27 +5102,49 @@ static int handle_vmread(struct kvm_vcpu *vcpu) if (!nested_vmx_check_permission(vcpu)) return 1; - /* - * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, - * any VMREAD sets the ALU flags for VMfailInvalid. - */ - if (vmx->nested.current_vmptr == INVALID_GPA || - (is_guest_mode(vcpu) && - get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) - return nested_vmx_failInvalid(vcpu); - /* Decode instruction info and find the field to read */ field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); - offset = vmcs_field_to_offset(field); - if (offset < 0) - return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + if (!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { + /* + * In VMX non-root operation, when the VMCS-link pointer is INVALID_GPA, + * any VMREAD sets the ALU flags for VMfailInvalid. + */ + if (vmx->nested.current_vmptr == INVALID_GPA || + (is_guest_mode(vcpu) && + get_vmcs12(vcpu)->vmcs_link_pointer == INVALID_GPA)) + return nested_vmx_failInvalid(vcpu); - if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) - copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); + offset = get_vmcs12_field_offset(field); + if (offset < 0) + return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + + if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field)) + copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12); - /* Read the field, zero-extended to a u64 value */ - value = vmcs12_read_any(vmcs12, field, offset); + /* Read the field, zero-extended to a u64 value */ + value = vmcs12_read_any(vmcs12, field, offset); + } else { + /* + * Hyper-V TLFS (as of 6.0b) explicitly states, that while an + * enlightened VMCS is active VMREAD/VMWRITE instructions are + * unsupported. Unfortunately, certain versions of Windows 11 + * don't comply with this requirement which is not enforced in + * genuine Hyper-V. Allow VMREAD from an enlightened VMCS as a + * workaround, as misbehaving guests will panic on VM-Fail. + * Note, enlightened VMCS is incompatible with shadow VMCS so + * all VMREADs from L2 should go to L1. + */ + if (WARN_ON_ONCE(is_guest_mode(vcpu))) + return nested_vmx_failInvalid(vcpu); + + offset = evmcs_field_offset(field, NULL); + if (offset < 0) + return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); + + /* Read the field, zero-extended to a u64 value */ + value = evmcs_read_any(vmx->nested.hv_evmcs, field, offset); + } /* * Now copy part of this value to register or memory, as requested. @@ -5214,7 +5239,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf)); - offset = vmcs_field_to_offset(field); + offset = get_vmcs12_field_offset(field); if (offset < 0) return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT); @@ -6462,7 +6487,7 @@ static u64 nested_vmx_calc_vmcs_enum_msr(void) max_idx = 0; for (i = 0; i < nr_vmcs12_fields; i++) { /* The vmcs12 table is very, very sparsely populated. */ - if (!vmcs_field_to_offset_table[i]) + if (!vmcs12_field_offsets[i]) continue; idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i)); @@ -6771,6 +6796,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) } struct kvm_x86_nested_ops vmx_nested_ops = { + .leave_nested = vmx_leave_nested, .check_events = vmx_check_nested_events, .hv_timer_pending = nested_vmx_preemption_timer_pending, .triple_fault = nested_vmx_triple_fault, diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 5e0ac57d6d1b84f699af816e08d1936bf5d767b6..466d18fc0c5da38cec6822b7f3af8d5439c72de9 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -21,7 +21,6 @@ #define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) static struct kvm_event_hw_type_mapping intel_arch_events[] = { - /* Index must match CPUID 0x0A.EBX bit vector */ [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES }, [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, [2] = { 0x3c, 0x01, PERF_COUNT_HW_BUS_CYCLES }, @@ -29,6 +28,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = { [4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES }, [5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, [6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, + /* The above index must match CPUID 0x0A.EBX bit vector */ [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES }, }; @@ -75,11 +75,17 @@ static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc) u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; int i; - for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) - if (intel_arch_events[i].eventsel == event_select && - intel_arch_events[i].unit_mask == unit_mask && - (pmc_is_fixed(pmc) || pmu->available_event_types & (1 << i))) - break; + for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) { + if (intel_arch_events[i].eventsel != event_select || + intel_arch_events[i].unit_mask != unit_mask) + continue; + + /* disable event that reported as not present by cpuid */ + if ((i < 7) && !(pmu->available_event_types & (1 << i))) + return PERF_COUNT_HW_MAX + 1; + + break; + } if (i == ARRAY_SIZE(intel_arch_events)) return PERF_COUNT_HW_MAX; @@ -481,7 +487,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->reserved_bits = 0xffffffff00200000ull; entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); - if (!entry) + if (!entry || !enable_pmu) return; eax.full = entry->eax; edx.full = entry->edx; diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index 88c53c521094a5d96066b6cb91d4db1627baf02e..aa1fe9085d77996405fb48629a5a59d030654f24 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -19,7 +19,7 @@ * wake the target vCPUs. vCPUs are removed from the list and the notification * vector is reset when the vCPU is scheduled in. */ -static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); +static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu); /* * Protect the per-CPU list with a per-CPU spinlock to handle task migration. * When a blocking vCPU is awakened _and_ migrated to a different pCPU, the @@ -27,7 +27,7 @@ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); * CPU. IRQs must be disabled when taking this lock, otherwise deadlock will * occur if a wakeup IRQ arrives and attempts to acquire the lock. */ -static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); +static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock); static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) { @@ -51,7 +51,9 @@ static int pi_try_set_control(struct pi_desc *pi_desc, u64 old, u64 new) void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); struct pi_desc old, new; + unsigned long flags; unsigned int dest; /* @@ -62,23 +64,34 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) if (!enable_apicv || !lapic_in_kernel(vcpu)) return; - /* Nothing to do if PI.SN and PI.NDST both have the desired value. */ - if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu) + /* + * If the vCPU wasn't on the wakeup list and wasn't migrated, then the + * full update can be skipped as neither the vector nor the destination + * needs to be changed. + */ + if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR && vcpu->cpu == cpu) { + /* + * Clear SN if it was set due to being preempted. Again, do + * this even if there is no assigned device for simplicity. + */ + if (pi_test_and_clear_sn(pi_desc)) + goto after_clear_sn; return; + } + + local_irq_save(flags); /* - * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change - * PI.NDST: pi_post_block is the one expected to change PID.NDST and the - * wakeup handler expects the vCPU to be on the blocked_vcpu_list that - * matches PI.NDST. Otherwise, a vcpu may not be able to be woken up - * correctly. + * If the vCPU was waiting for wakeup, remove the vCPU from the wakeup + * list of the _previous_ pCPU, which will not be the same as the + * current pCPU if the task was migrated. */ - if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR || vcpu->cpu == cpu) { - pi_clear_sn(pi_desc); - goto after_clear_sn; + if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) { + raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + list_del(&vmx->pi_wakeup_list); + raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); } - /* The full case. Set the new destination and clear SN. */ dest = cpu_physical_id(cpu); if (!x2apic_mode) dest = (dest << 8) & 0xFF00; @@ -86,10 +99,22 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) do { old.control = new.control = READ_ONCE(pi_desc->control); + /* + * Clear SN (as above) and refresh the destination APIC ID to + * handle task migration (@cpu != vcpu->cpu). + */ new.ndst = dest; new.sn = 0; + + /* + * Restore the notification vector; in the blocking case, the + * descriptor was modified on "put" to use the wakeup vector. + */ + new.nv = POSTED_INTR_VECTOR; } while (pi_try_set_control(pi_desc, old.control, new.control)); + local_irq_restore(flags); + after_clear_sn: /* @@ -111,83 +136,25 @@ static bool vmx_can_use_vtd_pi(struct kvm *kvm) irq_remapping_cap(IRQ_POSTING_CAP); } -void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - - if (!vmx_can_use_vtd_pi(vcpu->kvm)) - return; - - /* Set SN when the vCPU is preempted */ - if (vcpu->preempted) - pi_set_sn(pi_desc); -} - -static void __pi_post_block(struct kvm_vcpu *vcpu) -{ - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - struct pi_desc old, new; - unsigned int dest; - - /* - * Remove the vCPU from the wakeup list of the _previous_ pCPU, which - * will not be the same as the current pCPU if the task was migrated. - */ - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - list_del(&vcpu->blocked_vcpu_list); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); - - dest = cpu_physical_id(vcpu->cpu); - if (!x2apic_mode) - dest = (dest << 8) & 0xFF00; - - WARN(pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR, - "Wakeup handler not enabled while the vCPU was blocking"); - - do { - old.control = new.control = READ_ONCE(pi_desc->control); - - new.ndst = dest; - - /* set 'NV' to 'notification vector' */ - new.nv = POSTED_INTR_VECTOR; - } while (pi_try_set_control(pi_desc, old.control, new.control)); - - vcpu->pre_pcpu = -1; -} - /* - * This routine does the following things for vCPU which is going - * to be blocked if VT-d PI is enabled. - * - Store the vCPU to the wakeup list, so when interrupts happen - * we can find the right vCPU to wake up. - * - Change the Posted-interrupt descriptor as below: - * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR - * - If 'ON' is set during this process, which means at least one - * interrupt is posted for this vCPU, we cannot block it, in - * this case, return 1, otherwise, return 0. - * + * Put the vCPU on this pCPU's list of vCPUs that needs to be awakened and set + * WAKEUP as the notification vector in the PI descriptor. */ -int pi_pre_block(struct kvm_vcpu *vcpu) +static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu) { - struct pi_desc old, new; struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct pi_desc old, new; unsigned long flags; - if (!vmx_can_use_vtd_pi(vcpu->kvm) || - vmx_interrupt_blocked(vcpu)) - return 0; - local_irq_save(flags); - vcpu->pre_pcpu = vcpu->cpu; - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu)); - list_add_tail(&vcpu->blocked_vcpu_list, - &per_cpu(blocked_vcpu_on_cpu, vcpu->cpu)); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->cpu)); + raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); + list_add_tail(&vmx->pi_wakeup_list, + &per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu)); + raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu)); - WARN(pi_desc->sn == 1, - "Posted Interrupt Suppress Notification set before blocking"); + WARN(pi_desc->sn, "PI descriptor SN field set before blocking"); do { old.control = new.control = READ_ONCE(pi_desc->control); @@ -196,24 +163,37 @@ int pi_pre_block(struct kvm_vcpu *vcpu) new.nv = POSTED_INTR_WAKEUP_VECTOR; } while (pi_try_set_control(pi_desc, old.control, new.control)); - /* We should not block the vCPU if an interrupt is posted for it. */ - if (pi_test_on(pi_desc)) - __pi_post_block(vcpu); + /* + * Send a wakeup IPI to this CPU if an interrupt may have been posted + * before the notification vector was updated, in which case the IRQ + * will arrive on the non-wakeup vector. An IPI is needed as calling + * try_to_wake_up() from ->sched_out() isn't allowed (IRQs are not + * enabled until it is safe to call try_to_wake_up() on the task being + * scheduled out). + */ + if (pi_test_on(&new)) + apic->send_IPI_self(POSTED_INTR_WAKEUP_VECTOR); local_irq_restore(flags); - return (vcpu->pre_pcpu == -1); } -void pi_post_block(struct kvm_vcpu *vcpu) +void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) { - unsigned long flags; + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - if (vcpu->pre_pcpu == -1) + if (!vmx_can_use_vtd_pi(vcpu->kvm)) return; - local_irq_save(flags); - __pi_post_block(vcpu); - local_irq_restore(flags); + if (kvm_vcpu_is_blocking(vcpu) && !vmx_interrupt_blocked(vcpu)) + pi_enable_wakeup_handler(vcpu); + + /* + * Set SN when the vCPU is preempted. Note, the vCPU can both be seen + * as blocking and preempted, e.g. if it's preempted between setting + * its wait state and manually scheduling out. + */ + if (vcpu->preempted) + pi_set_sn(pi_desc); } /* @@ -221,24 +201,23 @@ void pi_post_block(struct kvm_vcpu *vcpu) */ void pi_wakeup_handler(void) { - struct kvm_vcpu *vcpu; int cpu = smp_processor_id(); + struct vcpu_vmx *vmx; - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); - list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), - blocked_vcpu_list) { - struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); + list_for_each_entry(vmx, &per_cpu(wakeup_vcpus_on_cpu, cpu), + pi_wakeup_list) { - if (pi_test_on(pi_desc)) - kvm_vcpu_kick(vcpu); + if (pi_test_on(&vmx->pi_desc)) + kvm_vcpu_wake_up(&vmx->vcpu); } - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); } void __init pi_init_cpu(int cpu) { - INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + INIT_LIST_HEAD(&per_cpu(wakeup_vcpus_on_cpu, cpu)); + raw_spin_lock_init(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu)); } bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu) @@ -254,7 +233,7 @@ bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu) * Bail out of the block loop if the VM has an assigned * device, but the blocking vCPU didn't reconfigure the * PI.NV to the wakeup vector, i.e. the assigned device - * came along after the initial check in pi_pre_block(). + * came along after the initial check in vmx_vcpu_pi_put(). */ void vmx_pi_start_assignment(struct kvm *kvm) { diff --git a/arch/x86/kvm/vmx/posted_intr.h b/arch/x86/kvm/vmx/posted_intr.h index 36ae035f14aa75b0db40281509fc780fed9e54f7..eb14e76b84efe6e957484a16fab351b02869ce70 100644 --- a/arch/x86/kvm/vmx/posted_intr.h +++ b/arch/x86/kvm/vmx/posted_intr.h @@ -40,6 +40,12 @@ static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc) (unsigned long *)&pi_desc->control); } +static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc) +{ + return test_and_clear_bit(POSTED_INTR_SN, + (unsigned long *)&pi_desc->control); +} + static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc) { return test_and_set_bit(vector, (unsigned long *)pi_desc->pir); @@ -88,8 +94,6 @@ static inline bool pi_test_sn(struct pi_desc *pi_desc) void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu); void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu); -int pi_pre_block(struct kvm_vcpu *vcpu); -void pi_post_block(struct kvm_vcpu *vcpu); void pi_wakeup_handler(void); void __init pi_init_cpu(int cpu); bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index cab6ba7a5005e1510daf749335066a2c16bbda9b..2251b60920f81535131e9e973cc5e262b87f528a 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -8,7 +8,7 @@ FIELD(number, name), \ [ROL16(number##_HIGH, 6)] = VMCS12_OFFSET(name) + sizeof(u32) -const unsigned short vmcs_field_to_offset_table[] = { +const unsigned short vmcs12_field_offsets[] = { FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id), FIELD(POSTED_INTR_NV, posted_intr_nv), FIELD(GUEST_ES_SELECTOR, guest_es_selector), @@ -151,4 +151,4 @@ const unsigned short vmcs_field_to_offset_table[] = { FIELD(HOST_RSP, host_rsp), FIELD(HOST_RIP, host_rip), }; -const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs_field_to_offset_table); +const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets); diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 2a45f026ee11dfbd8f39989e69828fe1bc79f2c8..746129ddd5ae02b0148130122728fd490a87ff91 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -361,10 +361,10 @@ static inline void vmx_check_vmcs12_offsets(void) CHECK_OFFSET(guest_pml_index, 996); } -extern const unsigned short vmcs_field_to_offset_table[]; +extern const unsigned short vmcs12_field_offsets[]; extern const unsigned int nr_vmcs12_fields; -static inline short vmcs_field_to_offset(unsigned long field) +static inline short get_vmcs12_field_offset(unsigned long field) { unsigned short offset; unsigned int index; @@ -377,7 +377,7 @@ static inline short vmcs_field_to_offset(unsigned long field) return -ENOENT; index = array_index_nospec(index, nr_vmcs12_fields); - offset = vmcs_field_to_offset_table[index]; + offset = vmcs12_field_offsets[index]; if (offset == 0) return -ENOENT; return offset; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 1b2e9d8c5cc9b0c0743ec034156869d83a9a1f0f..6c27bd0c89e1e613782c85637634e6215928ac24 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1487,11 +1487,12 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) return 0; } -static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int insn_len) +static bool vmx_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) { /* * Emulation of instructions in SGX enclaves is impossible as RIP does - * not point tthe failing instruction, and even if it did, the code + * not point at the failing instruction, and even if it did, the code * stream is inaccessible. Inject #UD instead of exiting to userspace * so that guest userspace can't DoS the guest simply by triggering * emulation (enclaves are CPL3 only). @@ -2603,7 +2604,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf, return -EIO; vmcs_conf->size = vmx_msr_high & 0x1fff; - vmcs_conf->order = get_order(vmcs_conf->size); vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff; vmcs_conf->revision_id = vmx_msr_low; @@ -2628,7 +2628,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) struct page *pages; struct vmcs *vmcs; - pages = __alloc_pages_node(node, flags, vmcs_config.order); + pages = __alloc_pages_node(node, flags, 0); if (!pages) return NULL; vmcs = page_address(pages); @@ -2647,7 +2647,7 @@ struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags) void free_vmcs(struct vmcs *vmcs) { - free_pages((unsigned long)vmcs, vmcs_config.order); + free_page((unsigned long)vmcs); } /* @@ -3931,12 +3931,10 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu) pt_update_intercept_for_msr(vcpu); } -static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, - bool nested) +static inline void kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, + int pi_vec) { #ifdef CONFIG_SMP - int pi_vec = nested ? POSTED_INTR_NESTED_VECTOR : POSTED_INTR_VECTOR; - if (vcpu->mode == IN_GUEST_MODE) { /* * The vector of interrupt to be delivered to vcpu had @@ -3964,10 +3962,15 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu, */ apic->send_IPI_mask(get_cpu_mask(vcpu->cpu), pi_vec); - return true; + return; } #endif - return false; + /* + * The vCPU isn't in the guest; wake the vCPU in case it is blocking, + * otherwise do nothing as KVM will grab the highest priority pending + * IRQ via ->sync_pir_to_irr() in vcpu_enter_guest(). + */ + kvm_vcpu_wake_up(vcpu); } static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, @@ -3997,8 +4000,7 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu, smp_mb__after_atomic(); /* the PIR and ON have been set by L1. */ - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, true)) - kvm_vcpu_kick(vcpu); + kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_NESTED_VECTOR); return 0; } return -1; @@ -4035,12 +4037,25 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) * guaranteed to see PID.ON=1 and sync the PIR to IRR if triggering a * posted interrupt "fails" because vcpu->mode != IN_GUEST_MODE. */ - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) - kvm_vcpu_kick(vcpu); - + kvm_vcpu_trigger_posted_interrupt(vcpu, POSTED_INTR_VECTOR); return 0; } +static void vmx_deliver_interrupt(struct kvm_lapic *apic, int delivery_mode, + int trig_mode, int vector) +{ + struct kvm_vcpu *vcpu = apic->vcpu; + + if (vmx_deliver_posted_interrupt(vcpu, vector)) { + kvm_lapic_set_irr(vector, apic); + kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_vcpu_kick(vcpu); + } else { + trace_kvm_apicv_accept_irq(vcpu->vcpu_id, delivery_mode, + trig_mode, vector); + } +} + /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. @@ -4094,10 +4109,14 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx) vmcs_write32(HOST_IA32_SYSENTER_CS, low32); /* - * If 32-bit syscall is enabled, vmx_vcpu_load_vcms rewrites - * HOST_IA32_SYSENTER_ESP. + * SYSENTER is used for 32-bit system calls on either 32-bit or + * 64-bit kernels. It is always zero If neither is allowed, otherwise + * vmx_vcpu_load_vmcs loads it with the per-CPU entry stack (and may + * have already done so!). */ - vmcs_writel(HOST_IA32_SYSENTER_ESP, 0); + if (!IS_ENABLED(CONFIG_IA32_EMULATION) && !IS_ENABLED(CONFIG_X86_32)) + vmcs_writel(HOST_IA32_SYSENTER_ESP, 0); + rdmsrl(MSR_IA32_SYSENTER_EIP, tmpl); vmcs_writel(HOST_IA32_SYSENTER_EIP, tmpl); /* 22.2.3 */ @@ -4901,8 +4920,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) dr6 = vmx_get_exit_qual(vcpu); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + /* + * If the #DB was due to ICEBP, a.k.a. INT1, skip the + * instruction. ICEBP generates a trap-like #DB, but + * despite its interception control being tied to #DB, + * is an instruction intercept, i.e. the VM-Exit occurs + * on the ICEBP itself. Note, skipping ICEBP also + * clears STI and MOVSS blocking. + * + * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS + * if single-step is enabled in RFLAGS and STI or MOVSS + * blocking is active, as the CPU doesn't set the bit + * on VM-Exit due to #DB interception. VM-Entry has a + * consistency check that a single-step #DB is pending + * in this scenario as the previous instruction cannot + * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV + * don't modify RFLAGS), therefore the one instruction + * delay when activating single-step breakpoints must + * have already expired. Note, the CPU sets/clears BS + * as appropriate for all other VM-Exits types. + */ if (is_icebp(intr_info)) WARN_ON(!skip_emulated_instruction(vcpu)); + else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) && + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS))) + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS); kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; @@ -5397,7 +5441,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) { gpa_t gpa; - if (!vmx_can_emulate_instruction(vcpu, NULL, 0)) + if (!vmx_can_emulate_instruction(vcpu, EMULTYPE_PF, NULL, 0)) return 1; /* @@ -5426,6 +5470,14 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu) return 1; } +static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + return vmx->emulation_required && !vmx->rmode.vm86_active && + vcpu->arch.exception.pending; +} + static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -5445,8 +5497,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (!kvm_emulate_instruction(vcpu, 0)) return 0; - if (vmx->emulation_required && !vmx->rmode.vm86_active && - vcpu->arch.exception.pending) { + if (vmx_emulation_required_with_pending_exception(vcpu)) { kvm_prepare_emulation_failure_exit(vcpu); return 0; } @@ -5468,6 +5519,16 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) return 1; } +static int vmx_vcpu_pre_run(struct kvm_vcpu *vcpu) +{ + if (vmx_emulation_required_with_pending_exception(vcpu)) { + kvm_prepare_emulation_failure_exit(vcpu); + return 0; + } + + return 1; +} + static void grow_ple_window(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6708,7 +6769,7 @@ static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx) { - kvm_guest_enter_irqoff(); + guest_state_enter_irqoff(); /* L1D Flush includes CPU buffer clear to mitigate MDS */ if (static_branch_unlikely(&vmx_l1d_should_flush)) @@ -6724,7 +6785,7 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, vcpu->arch.cr2 = native_read_cr2(); - kvm_guest_exit_irqoff(); + guest_state_exit_irqoff(); } static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) @@ -6928,6 +6989,8 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) BUILD_BUG_ON(offsetof(struct vcpu_vmx, vcpu) != 0); vmx = to_vmx(vcpu); + INIT_LIST_HEAD(&vmx->pi_wakeup_list); + err = -ENOMEM; vmx->vpid = allocate_vpid(); @@ -7549,25 +7612,6 @@ void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu) secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML); } -static int vmx_pre_block(struct kvm_vcpu *vcpu) -{ - if (pi_pre_block(vcpu)) - return 1; - - if (kvm_lapic_hv_timer_in_use(vcpu)) - kvm_lapic_switch_to_sw_timer(vcpu); - - return 0; -} - -static void vmx_post_block(struct kvm_vcpu *vcpu) -{ - if (kvm_x86_ops.set_hv_timer) - kvm_lapic_switch_to_hv_timer(vcpu); - - pi_post_block(vcpu); -} - static void vmx_setup_mce(struct kvm_vcpu *vcpu) { if (vcpu->arch.mcg_cap & MCG_LMCE_P) @@ -7710,6 +7754,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .tlb_flush_gva = vmx_flush_tlb_gva, .tlb_flush_guest = vmx_flush_tlb_guest, + .vcpu_pre_run = vmx_vcpu_pre_run, .run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, .skip_emulated_instruction = vmx_skip_emulated_instruction, @@ -7738,7 +7783,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .hwapic_isr_update = vmx_hwapic_isr_update, .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt, .sync_pir_to_irr = vmx_sync_pir_to_irr, - .deliver_posted_interrupt = vmx_deliver_posted_interrupt, + .deliver_interrupt = vmx_deliver_interrupt, .dy_apicv_has_pending_interrupt = pi_has_pending_interrupt, .set_tss_addr = vmx_set_tss_addr, @@ -7768,9 +7813,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .cpu_dirty_log_size = PML_ENTITY_NUM, .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging, - .pre_block = vmx_pre_block, - .post_block = vmx_post_block, - .pmu_ops = &intel_pmu_ops, .nested_ops = &vmx_nested_ops, diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index f8fc7441baea93da21762f92b93e45e45849cf81..7f2c82e7f38f86073549795c256f6451700faa58 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -317,6 +317,9 @@ struct vcpu_vmx { /* Posted interrupt descriptor */ struct pi_desc pi_desc; + /* Used if this vCPU is waiting for PI notification wakeup. */ + struct list_head pi_wakeup_list; + /* Support for a guest hypervisor (nested VMX) */ struct nested_vmx nested; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 76b4803dd3bdd0584bf01371df0f1652afdb8416..7131d735b1ef3fb888beb9144795b0f312923a05 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -90,6 +90,8 @@ u64 __read_mostly kvm_mce_cap_supported = MCG_CTL_P | MCG_SER_P; EXPORT_SYMBOL_GPL(kvm_mce_cap_supported); +#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) + #define emul_to_vcpu(ctxt) \ ((struct kvm_vcpu *)(ctxt)->vcpu) @@ -187,6 +189,11 @@ module_param(force_emulation_prefix, bool, S_IRUGO); int __read_mostly pi_inject_timer = -1; module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR); +/* Enable/disable PMU virtualization */ +bool __read_mostly enable_pmu = true; +EXPORT_SYMBOL_GPL(enable_pmu); +module_param(enable_pmu, bool, 0444); + /* * Restoring the host value for MSRs that are only consumed when running in * usermode, e.g. SYSCALL MSRs and TSC_AUX, can be deferred until the CPU @@ -3530,6 +3537,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~supported_xss) return 1; vcpu->arch.ia32_xss = data; + kvm_update_cpuid_runtime(vcpu); break; case MSR_SMI_COUNT: if (!msr_info->host_initiated) @@ -4224,6 +4232,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_SREGS2: case KVM_CAP_EXIT_ON_EMULATION_FAILURE: case KVM_CAP_VCPU_ATTRIBUTES: + case KVM_CAP_SYS_ATTRIBUTES: r = 1; break; case KVM_CAP_EXIT_HYPERCALL: @@ -4326,7 +4335,49 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; } return r; +} +static inline void __user *kvm_get_attr_addr(struct kvm_device_attr *attr) +{ + void __user *uaddr = (void __user*)(unsigned long)attr->addr; + + if ((u64)(unsigned long)uaddr != attr->addr) + return ERR_PTR_USR(-EFAULT); + return uaddr; +} + +static int kvm_x86_dev_get_attr(struct kvm_device_attr *attr) +{ + u64 __user *uaddr = kvm_get_attr_addr(attr); + + if (attr->group) + return -ENXIO; + + if (IS_ERR(uaddr)) + return PTR_ERR(uaddr); + + switch (attr->attr) { + case KVM_X86_XCOMP_GUEST_SUPP: + if (put_user(supported_xcr0, uaddr)) + return -EFAULT; + return 0; + default: + return -ENXIO; + break; + } +} + +static int kvm_x86_dev_has_attr(struct kvm_device_attr *attr) +{ + if (attr->group) + return -ENXIO; + + switch (attr->attr) { + case KVM_X86_XCOMP_GUEST_SUPP: + return 0; + default: + return -ENXIO; + } } long kvm_arch_dev_ioctl(struct file *filp, @@ -4417,6 +4468,22 @@ long kvm_arch_dev_ioctl(struct file *filp, case KVM_GET_SUPPORTED_HV_CPUID: r = kvm_ioctl_get_supported_hv_cpuid(NULL, argp); break; + case KVM_GET_DEVICE_ATTR: { + struct kvm_device_attr attr; + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_x86_dev_get_attr(&attr); + break; + } + case KVM_HAS_DEVICE_ATTR: { + struct kvm_device_attr attr; + r = -EFAULT; + if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) + break; + r = kvm_x86_dev_has_attr(&attr); + break; + } default: r = -EINVAL; break; @@ -4855,8 +4922,10 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { - if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) + if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { + kvm_x86_ops.nested_ops->leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); + } vcpu->arch.smi_pending = events->smi.pending; @@ -5017,11 +5086,11 @@ static int kvm_arch_tsc_has_attr(struct kvm_vcpu *vcpu, static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr; + u64 __user *uaddr = kvm_get_attr_addr(attr); int r; - if ((u64)(unsigned long)uaddr != attr->addr) - return -EFAULT; + if (IS_ERR(uaddr)) + return PTR_ERR(uaddr); switch (attr->attr) { case KVM_VCPU_TSC_OFFSET: @@ -5040,12 +5109,12 @@ static int kvm_arch_tsc_get_attr(struct kvm_vcpu *vcpu, static int kvm_arch_tsc_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) { - u64 __user *uaddr = (u64 __user *)(unsigned long)attr->addr; + u64 __user *uaddr = kvm_get_attr_addr(attr); struct kvm *kvm = vcpu->kvm; int r; - if ((u64)(unsigned long)uaddr != attr->addr) - return -EFAULT; + if (IS_ERR(uaddr)) + return PTR_ERR(uaddr); switch (attr->attr) { case KVM_VCPU_TSC_OFFSET: { @@ -5230,17 +5299,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_cpuid __user *cpuid_arg = argp; struct kvm_cpuid cpuid; - /* - * KVM does not correctly handle changing guest CPUID after KVM_RUN, as - * MAXPHYADDR, GBPAGES support, AMD reserved bit behavior, etc.. aren't - * tracked in kvm_mmu_page_role. As a result, KVM may miss guest page - * faults due to reusing SPs/SPTEs. In practice no sane VMM mucks with - * the core vCPU model on the fly, so fail. - */ - r = -EINVAL; - if (vcpu->arch.last_vmentry_cpu != -1) - goto out; - r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) goto out; @@ -5251,14 +5309,6 @@ long kvm_arch_vcpu_ioctl(struct file *filp, struct kvm_cpuid2 __user *cpuid_arg = argp; struct kvm_cpuid2 cpuid; - /* - * KVM_SET_CPUID{,2} after KVM_RUN is forbidded, see the comment in - * KVM_SET_CPUID case above. - */ - r = -EINVAL; - if (vcpu->arch.last_vmentry_cpu != -1) - goto out; - r = -EFAULT; if (copy_from_user(&cpuid, cpuid_arg, sizeof(cpuid))) goto out; @@ -6824,6 +6874,13 @@ int kvm_write_guest_virt_system(struct kvm_vcpu *vcpu, gva_t addr, void *val, } EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +static int kvm_can_emulate_insn(struct kvm_vcpu *vcpu, int emul_type, + void *insn, int insn_len) +{ + return static_call(kvm_x86_can_emulate_instruction)(vcpu, emul_type, + insn, insn_len); +} + int handle_ud(struct kvm_vcpu *vcpu) { static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX }; @@ -6831,7 +6888,7 @@ int handle_ud(struct kvm_vcpu *vcpu) char sig[5]; /* ud2; .ascii "kvm" */ struct x86_exception e; - if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0))) + if (unlikely(!kvm_can_emulate_insn(vcpu, emul_type, NULL, 0))) return 1; if (force_emulation_prefix && @@ -8207,7 +8264,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, bool writeback = true; bool write_fault_to_spt; - if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len))) + if (unlikely(!kvm_can_emulate_insn(vcpu, emulation_type, insn, insn_len))) return 1; vcpu->arch.l1tf_flush_l1d = true; @@ -9720,7 +9777,7 @@ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); } -void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) +static void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { if (!lapic_in_kernel(vcpu)) return; @@ -9945,10 +10002,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) smp_mb__after_srcu_read_unlock(); /* - * This handles the case where a posted interrupt was - * notified with kvm_vcpu_kick. Assigned devices can - * use the POSTED_INTR_VECTOR even if APICv is disabled, - * so do it even if APICv is disabled on this vCPU. + * Process pending posted interrupts to handle the case where the + * notification IRQ arrived in the host, or was never sent (because the + * target vCPU wasn't running). Do this regardless of the vCPU's APICv + * status, KVM doesn't update assigned devices when APICv is inhibited, + * i.e. they can post interrupts even if APICv is temporarily disabled. */ if (kvm_lapic_enabled(vcpu)) static_call_cond(kvm_x86_sync_pir_to_irr)(vcpu); @@ -9985,6 +10043,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(0, 7); } + guest_timing_enter_irqoff(); + for (;;) { /* * Assert that vCPU vs. VM APICv state is consistent. An APICv @@ -10069,7 +10129,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * of accounting via context tracking, but the loss of accuracy is * acceptable for all known use cases. */ - vtime_account_guest_exit(); + guest_timing_exit_irqoff(); if (lapic_in_kernel(vcpu)) { s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta; @@ -10113,8 +10173,20 @@ out: static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) { - if (!kvm_arch_vcpu_runnable(vcpu) && - (!kvm_x86_ops.pre_block || static_call(kvm_x86_pre_block)(vcpu) == 0)) { + bool hv_timer; + + if (!kvm_arch_vcpu_runnable(vcpu)) { + /* + * Switch to the software timer before halt-polling/blocking as + * the guest's timer may be a break event for the vCPU, and the + * hypervisor timer runs only when the CPU is in guest mode. + * Switch before halt-polling so that KVM recognizes an expired + * timer before blocking. + */ + hv_timer = kvm_lapic_hv_timer_in_use(vcpu); + if (hv_timer) + kvm_lapic_switch_to_sw_timer(vcpu); + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) kvm_vcpu_halt(vcpu); @@ -10122,8 +10194,8 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) kvm_vcpu_block(vcpu); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - if (kvm_x86_ops.post_block) - static_call(kvm_x86_post_block)(vcpu); + if (hv_timer) + kvm_lapic_switch_to_hv_timer(vcpu); if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) return 1; @@ -10316,6 +10388,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) r = -EINTR; goto out; } + /* + * It should be impossible for the hypervisor timer to be in + * use before KVM has ever run the vCPU. + */ + WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu)); kvm_vcpu_block(vcpu); if (kvm_apic_accept_events(vcpu) < 0) { r = 0; @@ -10360,10 +10437,16 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); - if (kvm_run->immediate_exit) + if (kvm_run->immediate_exit) { r = -EINTR; - else - r = vcpu_run(vcpu); + goto out; + } + + r = static_call(kvm_x86_vcpu_pre_run)(vcpu); + if (r <= 0) + goto out; + + r = vcpu_run(vcpu); out: kvm_put_guest_fpu(vcpu); @@ -11199,7 +11282,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.msr_misc_features_enables = 0; - vcpu->arch.xcr0 = XFEATURE_MASK_FP; + __kvm_set_xcr(vcpu, 0, XFEATURE_MASK_FP); + __kvm_set_msr(vcpu, MSR_IA32_XSS, 0, true); } /* All GPRs except RDX (handled below) are zeroed on RESET/INIT. */ @@ -11216,8 +11300,6 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) cpuid_0x1 = kvm_find_cpuid_entry(vcpu, 1, 0); kvm_rdx_write(vcpu, cpuid_0x1 ? cpuid_0x1->eax : 0x600); - vcpu->arch.ia32_xss = 0; - static_call(kvm_x86_vcpu_reset)(vcpu, init_event); kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); @@ -11561,8 +11643,6 @@ void kvm_arch_sync_events(struct kvm *kvm) kvm_free_pit(kvm); } -#define ERR_PTR_USR(e) ((void __user *)ERR_PTR(e)) - /** * __x86_set_memory_region: Setup KVM internal memory slot * diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index bec8ed090abcef044e78ec61a7ec7a404a9beee0..767ec7f9951608f984b4ac69a1c3205ce0a93ebc 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -10,51 +10,6 @@ void kvm_spurious_fault(void); -static __always_inline void kvm_guest_enter_irqoff(void) -{ - /* - * VMENTER enables interrupts (host state), but the kernel state is - * interrupts disabled when this is invoked. Also tell RCU about - * it. This is the same logic as for exit_to_user_mode(). - * - * This ensures that e.g. latency analysis on the host observes - * guest mode as interrupt enabled. - * - * guest_enter_irqoff() informs context tracking about the - * transition to guest mode and if enabled adjusts RCU state - * accordingly. - */ - instrumentation_begin(); - trace_hardirqs_on_prepare(); - lockdep_hardirqs_on_prepare(CALLER_ADDR0); - instrumentation_end(); - - guest_enter_irqoff(); - lockdep_hardirqs_on(CALLER_ADDR0); -} - -static __always_inline void kvm_guest_exit_irqoff(void) -{ - /* - * VMEXIT disables interrupts (host state), but tracing and lockdep - * have them in state 'on' as recorded before entering guest mode. - * Same as enter_from_user_mode(). - * - * context_tracking_guest_exit() restores host context and reinstates - * RCU if enabled and required. - * - * This needs to be done immediately after VM-Exit, before any code - * that might contain tracepoints or call out to the greater world, - * e.g. before x86_spec_ctrl_restore_host(). - */ - lockdep_hardirqs_off(CALLER_ADDR0); - context_tracking_guest_exit(); - - instrumentation_begin(); - trace_hardirqs_off_finish(); - instrumentation_end(); -} - #define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \ ({ \ bool failed = (consistency_check); \ @@ -336,6 +291,7 @@ extern u64 host_xcr0; extern u64 supported_xcr0; extern u64 host_xss; extern u64 supported_xss; +extern bool enable_pmu; static inline bool kvm_mpx_supported(void) { diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 0e3f7d6e9fd775d68589139feb1f4adf8eea9277..bad57535fad0825608c91e9cdd590c9fc0040a42 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -316,10 +316,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) "\tnotq %0\n" "\t" LOCK_PREFIX "andq %0, %2\n" "2:\n" - "\t.section .fixup,\"ax\"\n" - "3:\tjmp\t2b\n" - "\t.previous\n" - _ASM_EXTABLE_UA(1b, 3b) + _ASM_EXTABLE_UA(1b, 2b) : "=r" (evtchn_pending_sel), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) @@ -335,10 +332,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) "\tnotl %0\n" "\t" LOCK_PREFIX "andl %0, %2\n" "2:\n" - "\t.section .fixup,\"ax\"\n" - "3:\tjmp\t2b\n" - "\t.previous\n" - _ASM_EXTABLE_UA(1b, 3b) + _ASM_EXTABLE_UA(1b, 2b) : "=r" (evtchn_pending_sel32), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 2edd86649468fa4dfc43d259f47492e3103973e6..615a76d70019470b286d90fa91079836d46ea0ce 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -353,8 +353,8 @@ static void pci_fixup_video(struct pci_dev *pdev) } } } -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); static const struct dmi_system_id msi_k8t_dmi_table[] = { diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 95d26a69088b176d53d2ff8056d13d332c704a18..40d6a06e41c81b7b9b2328d1526ad8a84676c1d8 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -8,7 +8,6 @@ endmenu config UML_X86 def_bool y - select GENERIC_FIND_FIRST_BIT config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 42300941ec29621e5867b1105ecce0b321d26665..517a9d8d8f94d02cb96d3ba24f2208741970675d 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -184,8 +185,7 @@ static int xen_cpu_dead_hvm(unsigned int cpu) if (xen_have_vector_callback && xen_feature(XENFEAT_hvm_safe_pvclock)) xen_teardown_timer(cpu); - - return 0; + return 0; } static bool no_vector_callback __initdata; @@ -242,15 +242,14 @@ static __init int xen_parse_no_vector_callback(char *arg) } early_param("xen_no_vector_callback", xen_parse_no_vector_callback); -bool __init xen_hvm_need_lapic(void) +static __init bool xen_x2apic_available(void) { - if (xen_pv_domain()) - return false; - if (!xen_hvm_domain()) - return false; - if (xen_feature(XENFEAT_hvm_pirqs) && xen_have_vector_callback) - return false; - return true; + return x2apic_supported(); +} + +static bool __init msi_ext_dest_id(void) +{ + return cpuid_eax(xen_cpuid_base() + 4) & XEN_HVM_CPUID_EXT_DEST_ID; } static __init void xen_hvm_guest_late_init(void) @@ -312,9 +311,10 @@ struct hypervisor_x86 x86_hyper_xen_hvm __initdata = { .detect = xen_platform_hvm, .type = X86_HYPER_XEN_HVM, .init.init_platform = xen_hvm_guest_init, - .init.x2apic_available = xen_x2apic_para_available, + .init.x2apic_available = xen_x2apic_available, .init.init_mem_mapping = xen_hvm_init_mem_mapping, .init.guest_late_init = xen_hvm_guest_late_init, + .init.msi_ext_dest_id = msi_ext_dest_id, .runtime.pin_vcpu = xen_pin_vcpu, .ignore_nopv = true, }; diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5004feb16783d463eb7f54829ccaf7791f4e8313..d47c3d176ae4b7b8e2995a6ec6cd52ec138c3175 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1341,10 +1341,6 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_acpi_sleep_register(); - /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - xen_boot_params_init_edd(); #ifdef CONFIG_ACPI diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 6a8f3b53ab8343d98bea81433d80de7399a63038..4a6019238ee7dfded463a1325ec6acb5d3dbb576 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -148,28 +148,12 @@ int xen_smp_intr_init_pv(unsigned int cpu) return rc; } -static void __init xen_fill_possible_map(void) -{ - int i, rc; - - if (xen_initial_domain()) - return; - - for (i = 0; i < nr_cpu_ids; i++) { - rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) { - num_processors++; - set_cpu_possible(i, true); - } - } -} - -static void __init xen_filter_cpu_maps(void) +static void __init _get_smp_config(unsigned int early) { int i, rc; unsigned int subtract = 0; - if (!xen_initial_domain()) + if (early) return; num_processors = 0; @@ -210,7 +194,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void) * sure the old memory can be recycled. */ make_lowmem_page_readwrite(xen_initial_gdt); - xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); /* @@ -476,5 +459,8 @@ static const struct smp_ops xen_smp_ops __initconst = { void __init xen_smp_init(void) { smp_ops = xen_smp_ops; - xen_fill_possible_map(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = _get_smp_config; } diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c index 31b1e3477cb633f3804d4a653edca273019810e4..14ea32e734d59315c5aa70e1ba484be3468a5550 100644 --- a/arch/x86/xen/vga.c +++ b/arch/x86/xen/vga.c @@ -57,6 +57,14 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size; screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos; + if (size >= offsetof(struct dom0_vga_console_info, + u.vesa_lfb.ext_lfb_base) + + sizeof(info->u.vesa_lfb.ext_lfb_base) + && info->u.vesa_lfb.ext_lfb_base) { + screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base; + screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; + } + if (info->video_type == XEN_VGATYPE_EFI_LFB) { screen_info->orig_video_isVGA = VIDEO_TYPE_EFI; break; @@ -66,14 +74,6 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size) u.vesa_lfb.mode_attrs) + sizeof(info->u.vesa_lfb.mode_attrs)) screen_info->vesa_attributes = info->u.vesa_lfb.mode_attrs; - - if (size >= offsetof(struct dom0_vga_console_info, - u.vesa_lfb.ext_lfb_base) - + sizeof(info->u.vesa_lfb.ext_lfb_base) - && info->u.vesa_lfb.ext_lfb_base) { - screen_info->ext_lfb_base = info->u.vesa_lfb.ext_lfb_base; - screen_info->capabilities |= VIDEO_CAPABILITY_64BIT_BASE; - } break; } } diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h index 3f71d364ba909f5d40df9c289dd1134154a155cb..cd225896c40f4bfa5e675ee439d27a95b0230a22 100644 --- a/arch/xtensa/include/asm/bitops.h +++ b/arch/xtensa/include/asm/bitops.h @@ -205,7 +205,6 @@ BIT_OPS(change, "xor", ) #undef BIT_OP #undef TEST_AND_BIT_OP -#include #include #include diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 07b642c1916a87b0c1ac869c165235f24703ceff..8eb6ad1a3a1dec402dd852aa60224f250d4da440 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -208,7 +208,7 @@ static int simdisk_detach(struct simdisk *dev) static ssize_t proc_read_simdisk(struct file *file, char __user *buf, size_t size, loff_t *ppos) { - struct simdisk *dev = PDE_DATA(file_inode(file)); + struct simdisk *dev = pde_data(file_inode(file)); const char *s = dev->filename; if (s) { ssize_t n = simple_read_from_buffer(buf, size, ppos, @@ -225,7 +225,7 @@ static ssize_t proc_write_simdisk(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char *tmp = memdup_user_nul(buf, count); - struct simdisk *dev = PDE_DATA(file_inode(file)); + struct simdisk *dev = pde_data(file_inode(file)); int err; if (IS_ERR(tmp)) diff --git a/block/bdev.c b/block/bdev.c index 8bf93a19041b7beb925069ae33b86891c4ade7f9..102837a370517c78d9d17d7793cfdf6d2ce6adc6 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include "../fs/internal.h" @@ -88,10 +87,6 @@ void invalidate_bdev(struct block_device *bdev) lru_add_drain_all(); /* make sure all lru add caches are flushed */ invalidate_mapping_pages(mapping, 0, -1); } - /* 99% of the time, we don't need to flush the cleancache on the bdev. - * But, for the strange corners, lets be cautious - */ - cleancache_invalidate_inode(mapping); } EXPORT_SYMBOL(invalidate_bdev); diff --git a/block/bio-integrity.c b/block/bio-integrity.c index d2511471545928d602f16bacd6713faddb705a32..0827b19820c528e7b58c850e3543c2fb4bd61462 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -373,7 +373,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk); unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); - bip->bip_iter.bi_sector += bytes_done >> 9; + bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9); bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); } diff --git a/block/blk-core.c b/block/blk-core.c index 97f8bc8d3a7916c4b3fde840ea6aa55eaf3f82f5..d93e3bb9a769b46a29f6c1b4abbc15441341dd01 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1061,20 +1061,32 @@ again: } static unsigned long __part_start_io_acct(struct block_device *part, - unsigned int sectors, unsigned int op) + unsigned int sectors, unsigned int op, + unsigned long start_time) { const int sgrp = op_stat_group(op); - unsigned long now = READ_ONCE(jiffies); part_stat_lock(); - update_io_ticks(part, now, false); + update_io_ticks(part, start_time, false); part_stat_inc(part, ios[sgrp]); part_stat_add(part, sectors[sgrp], sectors); part_stat_local_inc(part, in_flight[op_is_write(op)]); part_stat_unlock(); - return now; + return start_time; +} + +/** + * bio_start_io_acct_time - start I/O accounting for bio based drivers + * @bio: bio to start account for + * @start_time: start time that should be passed back to bio_end_io_acct(). + */ +void bio_start_io_acct_time(struct bio *bio, unsigned long start_time) +{ + __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), start_time); } +EXPORT_SYMBOL_GPL(bio_start_io_acct_time); /** * bio_start_io_acct - start I/O accounting for bio based drivers @@ -1084,14 +1096,15 @@ static unsigned long __part_start_io_acct(struct block_device *part, */ unsigned long bio_start_io_acct(struct bio *bio) { - return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), bio_op(bio)); + return __part_start_io_acct(bio->bi_bdev, bio_sectors(bio), + bio_op(bio), jiffies); } EXPORT_SYMBOL_GPL(bio_start_io_acct); unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, unsigned int op) { - return __part_start_io_acct(disk->part0, sectors, op); + return __part_start_io_acct(disk->part0, sectors, op, jiffies); } EXPORT_SYMBOL(disk_start_io_acct); diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c index b925f3db3ab7a40f1f83808ada281603a56b765a..18c68d8b9138e89921074b9038c300cd04312ec9 100644 --- a/block/blk-ia-ranges.c +++ b/block/blk-ia-ranges.c @@ -144,7 +144,7 @@ int disk_register_independent_access_ranges(struct gendisk *disk, &q->kobj, "%s", "independent_access_ranges"); if (ret) { q->ia_ranges = NULL; - kfree(iars); + kobject_put(&iars->kobj); return ret; } diff --git a/block/blk-mq.c b/block/blk-mq.c index b5e35e63adad47f5da0f04b292c579c3b9824989..1adfe4824ef5e29377874b1b337326ab3b3c498b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2922,6 +2922,8 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request * */ blk_mq_run_dispatch_ops(rq->q, ret = blk_mq_request_issue_directly(rq, true)); + if (ret) + blk_account_io_done(rq, ktime_get_ns()); return ret; } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); @@ -3285,7 +3287,7 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx) static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu, struct blk_mq_hw_ctx *hctx) { - if (cpumask_next_and(-1, hctx->cpumask, cpu_online_mask) != cpu) + if (cpumask_first_and(hctx->cpumask, cpu_online_mask) != cpu) return false; if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids) return false; diff --git a/block/fops.c b/block/fops.c index 26bf15c770d21a1c8fc062337691db037d59972d..4f59e0f5bf30966214064260c9a4119773b2bff7 100644 --- a/block/fops.c +++ b/block/fops.c @@ -566,34 +566,37 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct block_device *bdev = iocb->ki_filp->private_data; loff_t size = bdev_nr_bytes(bdev); - size_t count = iov_iter_count(to); loff_t pos = iocb->ki_pos; size_t shorted = 0; ssize_t ret = 0; + size_t count; - if (unlikely(pos + count > size)) { + if (unlikely(pos + iov_iter_count(to) > size)) { if (pos >= size) return 0; size -= pos; - if (count > size) { - shorted = count - size; - iov_iter_truncate(to, size); - } + shorted = iov_iter_count(to) - size; + iov_iter_truncate(to, size); } + count = iov_iter_count(to); + if (!count) + goto reexpand; /* skip atime */ + if (iocb->ki_flags & IOCB_DIRECT) { struct address_space *mapping = iocb->ki_filp->f_mapping; if (iocb->ki_flags & IOCB_NOWAIT) { - if (filemap_range_needs_writeback(mapping, iocb->ki_pos, - iocb->ki_pos + count - 1)) - return -EAGAIN; + if (filemap_range_needs_writeback(mapping, pos, + pos + count - 1)) { + ret = -EAGAIN; + goto reexpand; + } } else { - ret = filemap_write_and_wait_range(mapping, - iocb->ki_pos, - iocb->ki_pos + count - 1); + ret = filemap_write_and_wait_range(mapping, pos, + pos + count - 1); if (ret < 0) - return ret; + goto reexpand; } file_accessed(iocb->ki_filp); @@ -603,12 +606,14 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) iocb->ki_pos += ret; count -= ret; } + iov_iter_revert(to, count - iov_iter_count(to)); if (ret < 0 || !count) - return ret; + goto reexpand; } ret = filemap_read(iocb, to, ret); +reexpand: if (unlikely(shorted)) iov_iter_reexpand(to, iov_iter_count(to) + shorted); return ret; diff --git a/certs/Makefile b/certs/Makefile index f7041c29a2e0eef0ff6006a5a7db2551b87529a4..3ea7fe60823f5b1c85a1db07bf3cd80c3ffb463e 100644 --- a/certs/Makefile +++ b/certs/Makefile @@ -68,14 +68,14 @@ $(obj)/x509.genkey: endif # CONFIG_MODULE_SIG_KEY # If CONFIG_MODULE_SIG_KEY isn't a PKCS#11 URI, depend on it -ifneq ($(filter-out pkcs11:%, %(CONFIG_MODULE_SIG_KEY)),) +ifneq ($(filter-out pkcs11:%, $(CONFIG_MODULE_SIG_KEY)),) X509_DEP := $(CONFIG_MODULE_SIG_KEY) endif $(obj)/system_certificates.o: $(obj)/signing_key.x509 $(obj)/signing_key.x509: $(X509_DEP) $(obj)/extract-cert FORCE - $(call if_changed,extract_certs,$(if $(X509_DEP),$<,$(CONFIG_MODULE_SIG_KEY))) + $(call if_changed,extract_certs,$(if $(CONFIG_MODULE_SIG_KEY),$(if $(X509_DEP),$<,$(CONFIG_MODULE_SIG_KEY)),"")) endif # CONFIG_MODULE_SIG targets += signing_key.x509 diff --git a/crypto/algapi.c b/crypto/algapi.c index a366cb3e8aa1840760f86c5767a906f9a88f6387..76fdaa16bd4a00bcfae7f871aab23e3eb4834b51 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -1324,3 +1324,4 @@ module_exit(crypto_algapi_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cryptographic algorithms API"); +MODULE_SOFTDEP("pre: cryptomgr"); diff --git a/crypto/api.c b/crypto/api.c index cf0869dd130b381fec0ddeceef77e7db14bab659..7ddfe946dd56b5e74fe721126ed5f7a9b1b50b00 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -643,4 +643,3 @@ EXPORT_SYMBOL_GPL(crypto_req_done); MODULE_DESCRIPTION("Cryptographic core API"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: cryptomgr"); diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c index 72fe480f9bd67de37747034a714379e884047e1c..5f96a21f8788311d954bed2b3fd1c969a97f7eeb 100644 --- a/crypto/blake2s_generic.c +++ b/crypto/blake2s_generic.c @@ -15,12 +15,12 @@ static int crypto_blake2s_update_generic(struct shash_desc *desc, const u8 *in, unsigned int inlen) { - return crypto_blake2s_update(desc, in, inlen, blake2s_compress_generic); + return crypto_blake2s_update(desc, in, inlen, true); } static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out) { - return crypto_blake2s_final(desc, out, blake2s_compress_generic); + return crypto_blake2s_final(desc, out, true); } #define BLAKE2S_ALG(name, driver_name, digest_size) \ diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c index 580ec796816bcf0eefdc66e23329ee10f173f19f..78ca4987e619e287001893eb09e22f90b39f7cb9 100644 --- a/drivers/accessibility/speakup/speakup_dectlk.c +++ b/drivers/accessibility/speakup/speakup_dectlk.c @@ -44,6 +44,7 @@ static struct var_t vars[] = { { CAPS_START, .u.s = {"[:dv ap 160] " } }, { CAPS_STOP, .u.s = {"[:dv ap 100 ] " } }, { RATE, .u.n = {"[:ra %d] ", 180, 75, 650, 0, 0, NULL } }, + { PITCH, .u.n = {"[:dv ap %d] ", 122, 50, 350, 0, 0, NULL } }, { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } }, { VOL, .u.n = {"[:dv g5 %d] ", 86, 60, 86, 0, 0, NULL } }, { PUNCT, .u.n = {"[:pu %c] ", 0, 0, 2, 0, 0, "nsa" } }, diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index ba45541b1f1f858fdf47335da23fa168784a8f2c..273741dedfd20e615ae6cabecee8c0938505016e 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -11,6 +11,7 @@ menuconfig ACPI depends on ARCH_SUPPORTS_ACPI select PNP select NLS + select CRC32 default y if X86 help Advanced Configuration and Power Interface (ACPI) support for diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 3b23fb775ac456cc743adba2230dff2bdb028a1c..f2f8f05662deb94fdf3a3f096886b049149a4958 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1361,9 +1361,17 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res, res[0].start = pmcg->page0_base_address; res[0].end = pmcg->page0_base_address + SZ_4K - 1; res[0].flags = IORESOURCE_MEM; - res[1].start = pmcg->page1_base_address; - res[1].end = pmcg->page1_base_address + SZ_4K - 1; - res[1].flags = IORESOURCE_MEM; + /* + * The initial version in DEN0049C lacked a way to describe register + * page 1, which makes it broken for most PMCG implementations; in + * that case, just let the driver fail gracefully if it expects to + * find a second memory resource. + */ + if (node->revision > 0) { + res[1].start = pmcg->page1_base_address; + res[1].end = pmcg->page1_base_address + SZ_4K - 1; + res[1].flags = IORESOURCE_MEM; + } if (pmcg->overflow_gsiv) acpi_iort_register_irq(pmcg->overflow_gsiv, "overflow", diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index a9d2de403c0c4233f80593f05823eea886ee1a8e..866560cbb082cd7b14caf384e3a5fba465dfa778 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -915,30 +915,31 @@ int __weak cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) { - int ret_val = 0; void __iomem *vaddr = NULL; int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu); struct cpc_reg *reg = ®_res->cpc_entry.reg; if (reg_res->type == ACPI_TYPE_INTEGER) { *val = reg_res->cpc_entry.int_value; - return ret_val; + return 0; } *val = 0; if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_IO) { u32 width = 8 << (reg->access_width - 1); + u32 val_u32; acpi_status status; status = acpi_os_read_port((acpi_io_address)reg->address, - (u32 *)val, width); + &val_u32, width); if (ACPI_FAILURE(status)) { pr_debug("Error: Failed to read SystemIO port %llx\n", reg->address); return -EFAULT; } + *val = val_u32; return 0; } else if (reg->space_id == ACPI_ADR_SPACE_PLATFORM_COMM && pcc_ss_id >= 0) vaddr = GET_PCC_VADDR(reg->address, pcc_ss_id); @@ -966,10 +967,10 @@ static int cpc_read(int cpu, struct cpc_register_resource *reg_res, u64 *val) default: pr_debug("Error: Cannot read %u bit width from PCC for ss: %d\n", reg->bit_width, pcc_ss_id); - ret_val = -EFAULT; + return -EFAULT; } - return ret_val; + return 0; } static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val) diff --git a/drivers/acpi/dptf/dptf_pch_fivr.c b/drivers/acpi/dptf/dptf_pch_fivr.c index e7ab0fc90db92a5e845e054e10f56bb7c44b936c..c0da24c9f8c36bb526ad3958fc6438198a228b31 100644 --- a/drivers/acpi/dptf/dptf_pch_fivr.c +++ b/drivers/acpi/dptf/dptf_pch_fivr.c @@ -151,6 +151,7 @@ static int pch_fivr_remove(struct platform_device *pdev) static const struct acpi_device_id pch_fivr_device_ids[] = { {"INTC1045", 0}, {"INTC1049", 0}, + {"INTC10A3", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, pch_fivr_device_ids); diff --git a/drivers/acpi/dptf/dptf_power.c b/drivers/acpi/dptf/dptf_power.c index a24d5d7aa1178471ef387b15ee4827ebfa4feacc..dc1f52a5b3f48a67eac780336da1e20b448995f4 100644 --- a/drivers/acpi/dptf/dptf_power.c +++ b/drivers/acpi/dptf/dptf_power.c @@ -231,6 +231,8 @@ static const struct acpi_device_id int3407_device_ids[] = { {"INTC1050", 0}, {"INTC1060", 0}, {"INTC1061", 0}, + {"INTC10A4", 0}, + {"INTC10A5", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, int3407_device_ids); diff --git a/drivers/acpi/dptf/int340x_thermal.c b/drivers/acpi/dptf/int340x_thermal.c index da5d5f0be2f27bf45661d5b1ca8002f3472197fd..42a5563465484aedf742c75464b2406c62bcf319 100644 --- a/drivers/acpi/dptf/int340x_thermal.c +++ b/drivers/acpi/dptf/int340x_thermal.c @@ -37,6 +37,12 @@ static const struct acpi_device_id int340x_thermal_device_ids[] = { {"INTC1050"}, {"INTC1060"}, {"INTC1061"}, + {"INTC10A0"}, + {"INTC10A1"}, + {"INTC10A2"}, + {"INTC10A3"}, + {"INTC10A4"}, + {"INTC10A5"}, {""}, }; diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 0077d2c85df870fd883a88279eec56ef8404a90a..46710380a40292ac4b1b24f3d58d47105099a563 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -2065,6 +2065,16 @@ bool acpi_ec_dispatch_gpe(void) if (acpi_any_gpe_status_set(first_ec->gpe)) return true; + /* + * Cancel the SCI wakeup and process all pending events in case there + * are any wakeup ones in there. + * + * Note that if any non-EC GPEs are active at this point, the SCI will + * retrigger after the rearming in acpi_s2idle_wake(), so no events + * should be missed by canceling the wakeup here. + */ + pm_system_cancel_wakeup(); + /* * Dispatch the EC GPE in-band, but do not report wakeup in any case * to allow the caller to process events properly after that. diff --git a/drivers/acpi/fan.h b/drivers/acpi/fan.h index dc9a6efa514b0378a5c72d96f1d55e44746ec441..dd9bb8ca224426bd7115a15d6acb7610071fb005 100644 --- a/drivers/acpi/fan.h +++ b/drivers/acpi/fan.h @@ -10,4 +10,5 @@ {"INT3404", }, /* Fan */ \ {"INTC1044", }, /* Fan for Tiger Lake generation */ \ {"INTC1048", }, /* Fan for Alder Lake generation */ \ + {"INTC10A2", }, /* Fan for Raptor Lake generation */ \ {"PNP0C0B", } /* Generic ACPI fan */ diff --git a/drivers/acpi/proc.c b/drivers/acpi/proc.c index 0cca7991f18643c98ed2cf1f9328b98b2e9b76fb..4322f2da6d10d1efbdb690cfe1dfd66cabca0dea 100644 --- a/drivers/acpi/proc.c +++ b/drivers/acpi/proc.c @@ -127,7 +127,7 @@ static int acpi_system_wakeup_device_open_fs(struct inode *inode, struct file *file) { return single_open(file, acpi_system_wakeup_device_seq_show, - PDE_DATA(inode)); + pde_data(inode)); } static const struct proc_ops acpi_system_wakeup_device_proc_ops = { diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index a60ff5dfed3a4853d6087b2d66ade98ce203d0e0..d4fbea91ab6b824efdec881db0cca29cbf99a643 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -736,21 +736,15 @@ bool acpi_s2idle_wake(void) return true; } - /* Check non-EC GPE wakeups and dispatch the EC GPE. */ + /* + * Check non-EC GPE wakeups and if there are none, cancel the + * SCI-related wakeup and dispatch the EC GPE. + */ if (acpi_ec_dispatch_gpe()) { pm_pr_dbg("ACPI non-EC GPE wakeup\n"); return true; } - /* - * Cancel the SCI wakeup and process all pending events in case - * there are any wakeup ones in there. - * - * Note that if any non-EC GPEs are active at this point, the - * SCI will retrigger after the rearming below, so no events - * should be missed by canceling the wakeup here. - */ - pm_system_cancel_wakeup(); acpi_os_wait_events_complete(); /* @@ -764,6 +758,7 @@ bool acpi_s2idle_wake(void) return true; } + pm_wakeup_clear(acpi_sci_irq); rearm_wake_irq(acpi_sci_irq); } diff --git a/drivers/acpi/x86/s2idle.c b/drivers/acpi/x86/s2idle.c index abc06e7f89d834eca0789c934ffe1bd915759bd7..ed889f827f539f505428f1df72741e593648057a 100644 --- a/drivers/acpi/x86/s2idle.c +++ b/drivers/acpi/x86/s2idle.c @@ -424,15 +424,11 @@ static int lps0_device_attach(struct acpi_device *adev, mem_sleep_current = PM_SUSPEND_TO_IDLE; /* - * Some Intel based LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U don't - * use intel-hid or intel-vbtn but require the EC GPE to be enabled while - * suspended for certain wakeup devices to work, so mark it as wakeup-capable. - * - * Only enable on !AMD as enabling this universally causes problems for a number - * of AMD based systems. + * Some LPS0 systems, like ASUS Zenbook UX430UNR/i7-8550U, require the + * EC GPE to be enabled while suspended for certain wakeup devices to + * work, so mark it as wakeup-capable. */ - if (!acpi_s2idle_vendor_amd()) - acpi_ec_mark_gpe_for_wake(); + acpi_ec_mark_gpe_for_wake(); return 0; } diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 67f88027680acefe4e2eed3c1b598773625d644a..0c854aebfe0bdcc39e49d401ce91b40e4c34b8bb 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2007,6 +2007,9 @@ static bool ata_log_supported(struct ata_device *dev, u8 log) { struct ata_port *ap = dev->link->ap; + if (dev->horkage & ATA_HORKAGE_NO_LOG_DIR) + return false; + if (ata_read_log_page(dev, ATA_LOG_DIRECTORY, 0, ap->sector_buf, 1)) return false; return get_unaligned_le16(&ap->sector_buf[log * 2]) ? true : false; @@ -2445,23 +2448,21 @@ static void ata_dev_config_cpr(struct ata_device *dev) struct ata_cpr_log *cpr_log = NULL; u8 *desc, *buf = NULL; - if (!ata_identify_page_supported(dev, - ATA_LOG_CONCURRENT_POSITIONING_RANGES)) + if (ata_id_major_version(dev->id) < 11 || + !ata_log_supported(dev, ATA_LOG_CONCURRENT_POSITIONING_RANGES)) goto out; /* - * Read IDENTIFY DEVICE data log, page 0x47 - * (concurrent positioning ranges). We can have at most 255 32B range - * descriptors plus a 64B header. + * Read the concurrent positioning ranges log (0x47). We can have at + * most 255 32B range descriptors plus a 64B header. */ buf_len = (64 + 255 * 32 + 511) & ~511; buf = kzalloc(buf_len, GFP_KERNEL); if (!buf) goto out; - err_mask = ata_read_log_page(dev, ATA_LOG_IDENTIFY_DEVICE, - ATA_LOG_CONCURRENT_POSITIONING_RANGES, - buf, buf_len >> 9); + err_mask = ata_read_log_page(dev, ATA_LOG_CONCURRENT_POSITIONING_RANGES, + 0, buf, buf_len >> 9); if (err_mask) goto out; @@ -4028,6 +4029,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { /* devices that don't properly handle TRIM commands */ { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, }, + { "M88V29*", NULL, ATA_HORKAGE_NOTRIM, }, /* * As defined, the DRAT (Deterministic Read After Trim) and RZAT @@ -4073,6 +4075,13 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "WDC WD3000JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, { "WDC WD3200JD-*", NULL, ATA_HORKAGE_WD_BROKEN_LPM }, + /* + * This sata dom device goes on a walkabout when the ATA_LOG_DIRECTORY + * log page is accessed. Ensure we never ask for this log page with + * these devices. + */ + { "SATADOM-ML 3ME", NULL, ATA_HORKAGE_NO_LOG_DIR }, + /* End Marker */ { } }; diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c index 0912846bc1b0a281165345c3eaf9b46b7211c6a0..05c2ab3757568c62a93377fe9a5ceed19f49e3f1 100644 --- a/drivers/ata/pata_octeon_cf.c +++ b/drivers/ata/pata_octeon_cf.c @@ -595,7 +595,7 @@ static unsigned int octeon_cf_dma_finished(struct ata_port *ap, union cvmx_mio_boot_dma_intx dma_int; u8 status; - trace_ata_bmdma_stop(qc, &qc->tf, qc->tag); + trace_ata_bmdma_stop(ap, &qc->tf, qc->tag); if (ap->hsm_task_state != HSM_ST_LAST) return 0; diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c index 028329428b75d48379f44405ddb99192804ceb13..87c7c90676ca019ab8efcc9b3aa61f35d69ff781 100644 --- a/drivers/ata/pata_platform.c +++ b/drivers/ata/pata_platform.c @@ -128,6 +128,8 @@ int __pata_platform_probe(struct device *dev, struct resource *io_res, ap = host->ports[0]; ap->ops = devm_kzalloc(dev, sizeof(*ap->ops), GFP_KERNEL); + if (!ap->ops) + return -ENOMEM; ap->ops->inherits = &ata_sff_port_ops; ap->ops->cable_detect = ata_cable_unknown; ap->ops->set_mode = pata_platform_set_mode; diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index da0152116d9f779dd1447ab0452247d7540b407b..556034a15430461f243e50ac5b4bec19e9e2cd69 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -322,7 +322,7 @@ static void fsl_sata_set_irq_coalescing(struct ata_host *host, static ssize_t fsl_sata_intr_coalescing_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sysfs_emit(buf, "%d %d\n", + return sysfs_emit(buf, "%u %u\n", intr_coalescing_count, intr_coalescing_ticks); } @@ -332,10 +332,8 @@ static ssize_t fsl_sata_intr_coalescing_store(struct device *dev, { unsigned int coalescing_count, coalescing_ticks; - if (sscanf(buf, "%d%d", - &coalescing_count, - &coalescing_ticks) != 2) { - printk(KERN_ERR "fsl-sata: wrong parameter format.\n"); + if (sscanf(buf, "%u%u", &coalescing_count, &coalescing_ticks) != 2) { + dev_err(dev, "fsl-sata: wrong parameter format.\n"); return -EINVAL; } @@ -359,7 +357,7 @@ static ssize_t fsl_sata_rx_watermark_show(struct device *dev, rx_watermark &= 0x1f; spin_unlock_irqrestore(&host->lock, flags); - return sysfs_emit(buf, "%d\n", rx_watermark); + return sysfs_emit(buf, "%u\n", rx_watermark); } static ssize_t fsl_sata_rx_watermark_store(struct device *dev, @@ -373,8 +371,8 @@ static ssize_t fsl_sata_rx_watermark_store(struct device *dev, void __iomem *csr_base = host_priv->csr_base; u32 temp; - if (sscanf(buf, "%d", &rx_watermark) != 1) { - printk(KERN_ERR "fsl-sata: wrong parameter format.\n"); + if (kstrtouint(buf, 10, &rx_watermark) < 0) { + dev_err(dev, "fsl-sata: wrong parameter format.\n"); return -EINVAL; } @@ -382,8 +380,8 @@ static ssize_t fsl_sata_rx_watermark_store(struct device *dev, temp = ioread32(csr_base + TRANSCFG); temp &= 0xffffffe0; iowrite32(temp | rx_watermark, csr_base + TRANSCFG); - spin_unlock_irqrestore(&host->lock, flags); + return strlen(buf); } diff --git a/drivers/base/firmware_loader/fallback.c b/drivers/base/firmware_loader/fallback.c index d7d63c1aa993f57297b953c6b7eca88147006fff..4afb0e9312c094f9b0e8dfdc8fb8d415e2d4fbd3 100644 --- a/drivers/base/firmware_loader/fallback.c +++ b/drivers/base/firmware_loader/fallback.c @@ -199,11 +199,16 @@ static struct class firmware_class = { int register_sysfs_loader(void) { - return class_register(&firmware_class); + int ret = class_register(&firmware_class); + + if (ret != 0) + return ret; + return register_firmware_config_sysctl(); } void unregister_sysfs_loader(void) { + unregister_firmware_config_sysctl(); class_unregister(&firmware_class); } diff --git a/drivers/base/firmware_loader/fallback.h b/drivers/base/firmware_loader/fallback.h index 3af7205b302fe351a0eb43883da16ff0a9865ac6..9f3055d3b4ca846a9c7a62b39c33bb3d14c8b678 100644 --- a/drivers/base/firmware_loader/fallback.h +++ b/drivers/base/firmware_loader/fallback.h @@ -42,6 +42,17 @@ void fw_fallback_set_default_timeout(void); int register_sysfs_loader(void); void unregister_sysfs_loader(void); +#ifdef CONFIG_SYSCTL +extern int register_firmware_config_sysctl(void); +extern void unregister_firmware_config_sysctl(void); +#else +static inline int register_firmware_config_sysctl(void) +{ + return 0; +} +static inline void unregister_firmware_config_sysctl(void) { } +#endif /* CONFIG_SYSCTL */ + #else /* CONFIG_FW_LOADER_USER_HELPER */ static inline int firmware_fallback_sysfs(struct firmware *fw, const char *name, struct device *device, diff --git a/drivers/base/firmware_loader/fallback_table.c b/drivers/base/firmware_loader/fallback_table.c index 46a731dede6f7a08b6e7c0f6d2a6cad0664a7d33..e5ac098d0742a0bdd37a3426ea6f250b7a47b40a 100644 --- a/drivers/base/firmware_loader/fallback_table.c +++ b/drivers/base/firmware_loader/fallback_table.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -24,7 +25,7 @@ struct firmware_fallback_config fw_fallback_config = { EXPORT_SYMBOL_NS_GPL(fw_fallback_config, FIRMWARE_LOADER_PRIVATE); #ifdef CONFIG_SYSCTL -struct ctl_table firmware_config_table[] = { +static struct ctl_table firmware_config_table[] = { { .procname = "force_sysfs_fallback", .data = &fw_fallback_config.force_sysfs_fallback, @@ -45,4 +46,24 @@ struct ctl_table firmware_config_table[] = { }, { } }; -#endif + +static struct ctl_table_header *firmware_config_sysct_table_header; +int register_firmware_config_sysctl(void) +{ + firmware_config_sysct_table_header = + register_sysctl("kernel/firmware_config", + firmware_config_table); + if (!firmware_config_sysct_table_header) + return -ENOMEM; + return 0; +} +EXPORT_SYMBOL_NS_GPL(register_firmware_config_sysctl, FIRMWARE_LOADER_PRIVATE); + +void unregister_firmware_config_sysctl(void) +{ + unregister_sysctl_table(firmware_config_sysct_table_header); + firmware_config_sysct_table_header = NULL; +} +EXPORT_SYMBOL_NS_GPL(unregister_firmware_config_sysctl, FIRMWARE_LOADER_PRIVATE); + +#endif /* CONFIG_SYSCTL */ diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 99bda0da23a8264354a79631fb0590c75bb12e9f..8666590201c9a0f43430d9ad055f07bae25c0c6e 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -34,7 +34,8 @@ suspend_state_t pm_suspend_target_state; bool events_check_enabled __read_mostly; /* First wakeup IRQ seen by the kernel in the last cycle. */ -unsigned int pm_wakeup_irq __read_mostly; +static unsigned int wakeup_irq[2] __read_mostly; +static DEFINE_RAW_SPINLOCK(wakeup_irq_lock); /* If greater than 0 and the system is suspending, terminate the suspend. */ static atomic_t pm_abort_suspend __read_mostly; @@ -942,19 +943,45 @@ void pm_system_cancel_wakeup(void) atomic_dec_if_positive(&pm_abort_suspend); } -void pm_wakeup_clear(bool reset) +void pm_wakeup_clear(unsigned int irq_number) { - pm_wakeup_irq = 0; - if (reset) + raw_spin_lock_irq(&wakeup_irq_lock); + + if (irq_number && wakeup_irq[0] == irq_number) + wakeup_irq[0] = wakeup_irq[1]; + else + wakeup_irq[0] = 0; + + wakeup_irq[1] = 0; + + raw_spin_unlock_irq(&wakeup_irq_lock); + + if (!irq_number) atomic_set(&pm_abort_suspend, 0); } void pm_system_irq_wakeup(unsigned int irq_number) { - if (pm_wakeup_irq == 0) { - pm_wakeup_irq = irq_number; + unsigned long flags; + + raw_spin_lock_irqsave(&wakeup_irq_lock, flags); + + if (wakeup_irq[0] == 0) + wakeup_irq[0] = irq_number; + else if (wakeup_irq[1] == 0) + wakeup_irq[1] = irq_number; + else + irq_number = 0; + + raw_spin_unlock_irqrestore(&wakeup_irq_lock, flags); + + if (irq_number) pm_system_wakeup(); - } +} + +unsigned int pm_wakeup_irq(void) +{ + return wakeup_irq[0]; } /** diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 01cbbfc4e9e24c702b738259cb572af64c600723..150012ffb387f593ec00ae2403a72452bc89579f 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1082,7 +1082,7 @@ out_putf: return error; } -static void __loop_clr_fd(struct loop_device *lo) +static void __loop_clr_fd(struct loop_device *lo, bool release) { struct file *filp; gfp_t gfp = lo->old_gfp_mask; @@ -1144,6 +1144,8 @@ static void __loop_clr_fd(struct loop_device *lo) /* let user-space know about this change */ kobject_uevent(&disk_to_dev(lo->lo_disk)->kobj, KOBJ_CHANGE); mapping_set_gfp_mask(filp->f_mapping, gfp); + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); blk_mq_unfreeze_queue(lo->lo_queue); disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE); @@ -1151,52 +1153,44 @@ static void __loop_clr_fd(struct loop_device *lo) if (lo->lo_flags & LO_FLAGS_PARTSCAN) { int err; - mutex_lock(&lo->lo_disk->open_mutex); + /* + * open_mutex has been held already in release path, so don't + * acquire it if this function is called in such case. + * + * If the reread partition isn't from release path, lo_refcnt + * must be at least one and it can only become zero when the + * current holder is released. + */ + if (!release) + mutex_lock(&lo->lo_disk->open_mutex); err = bdev_disk_changed(lo->lo_disk, false); - mutex_unlock(&lo->lo_disk->open_mutex); + if (!release) + mutex_unlock(&lo->lo_disk->open_mutex); if (err) pr_warn("%s: partition scan of loop%d failed (rc=%d)\n", __func__, lo->lo_number, err); /* Device is gone, no point in returning error */ } + /* + * lo->lo_state is set to Lo_unbound here after above partscan has + * finished. There cannot be anybody else entering __loop_clr_fd() as + * Lo_rundown state protects us from all the other places trying to + * change the 'lo' device. + */ lo->lo_flags = 0; if (!part_shift) lo->lo_disk->flags |= GENHD_FL_NO_PART; - - fput(filp); -} - -static void loop_rundown_completed(struct loop_device *lo) -{ mutex_lock(&lo->lo_mutex); lo->lo_state = Lo_unbound; mutex_unlock(&lo->lo_mutex); - module_put(THIS_MODULE); -} - -static void loop_rundown_workfn(struct work_struct *work) -{ - struct loop_device *lo = container_of(work, struct loop_device, - rundown_work); - struct block_device *bdev = lo->lo_device; - struct gendisk *disk = lo->lo_disk; - - __loop_clr_fd(lo); - kobject_put(&bdev->bd_device.kobj); - module_put(disk->fops->owner); - loop_rundown_completed(lo); -} -static void loop_schedule_rundown(struct loop_device *lo) -{ - struct block_device *bdev = lo->lo_device; - struct gendisk *disk = lo->lo_disk; - - __module_get(disk->fops->owner); - kobject_get(&bdev->bd_device.kobj); - INIT_WORK(&lo->rundown_work, loop_rundown_workfn); - queue_work(system_long_wq, &lo->rundown_work); + /* + * Need not hold lo_mutex to fput backing file. Calling fput holding + * lo_mutex triggers a circular lock dependency possibility warning as + * fput can take open_mutex which is usually taken before lo_mutex. + */ + fput(filp); } static int loop_clr_fd(struct loop_device *lo) @@ -1228,8 +1222,7 @@ static int loop_clr_fd(struct loop_device *lo) lo->lo_state = Lo_rundown; mutex_unlock(&lo->lo_mutex); - __loop_clr_fd(lo); - loop_rundown_completed(lo); + __loop_clr_fd(lo, false); return 0; } @@ -1754,7 +1747,7 @@ static void lo_release(struct gendisk *disk, fmode_t mode) * In autoclear mode, stop the loop thread * and remove configuration after last close. */ - loop_schedule_rundown(lo); + __loop_clr_fd(lo, true); return; } else if (lo->lo_state == Lo_bound) { /* diff --git a/drivers/block/loop.h b/drivers/block/loop.h index 918a7a2dc025933a50dfe1ef8bc420b51639a9c0..082d4b6bfc6a6cbe789fa2c6361ae3a88a8b3e27 100644 --- a/drivers/block/loop.h +++ b/drivers/block/loop.h @@ -56,7 +56,6 @@ struct loop_device { struct gendisk *lo_disk; struct mutex lo_mutex; bool idr_visible; - struct work_struct rundown_work; }; struct loop_cmd { diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 8f8443ee6fe4609269e9eeb7b9e6d4f16d3b9e70..c08971de369fc7a024306632e2695f54f0759ae7 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -196,7 +196,7 @@ rnbd_get_cpu_qlist(struct rnbd_clt_session *sess, int cpu) return per_cpu_ptr(sess->cpu_queues, bit); } else if (cpu != 0) { /* Search from 0 to cpu */ - bit = find_next_bit(sess->cpu_queues_bm, cpu, 0); + bit = find_first_bit(sess->cpu_queues_bm, cpu); if (bit < cpu) return per_cpu_ptr(sess->cpu_queues, bit); } diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c index 3a258a677df855ec4c7c01cd4d028d6fe22306dd..b79895810c52f8fc04d9ab4fcc063a012064d5f0 100644 --- a/drivers/bus/mhi/pci_generic.c +++ b/drivers/bus/mhi/pci_generic.c @@ -366,6 +366,7 @@ static const struct mhi_pci_dev_info mhi_foxconn_sdx55_info = { .config = &modem_foxconn_sdx55_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, .dma_data_width = 32, + .mru_default = 32768, .sideband_wake = false, }; @@ -401,6 +402,7 @@ static const struct mhi_pci_dev_info mhi_mv31_info = { .config = &modem_mv31_config, .bar_num = MHI_PCI_DEFAULT_BAR_NUM, .dma_data_width = 32, + .mru_default = 32768, }; static const struct mhi_channel_config mhi_sierra_em919x_channels[] = { diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 9877e413fce39da5d335d2a9c8678d32f22e4368..1b57d4666e43c3b7de50f301b4e0d380f55fa067 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -3691,27 +3691,6 @@ static struct ctl_table cdrom_table[] = { }, { } }; - -static struct ctl_table cdrom_cdrom_table[] = { - { - .procname = "cdrom", - .maxlen = 0, - .mode = 0555, - .child = cdrom_table, - }, - { } -}; - -/* Make sure that /proc/sys/dev is there */ -static struct ctl_table cdrom_root_table[] = { - { - .procname = "dev", - .maxlen = 0, - .mode = 0555, - .child = cdrom_cdrom_table, - }, - { } -}; static struct ctl_table_header *cdrom_sysctl_header; static void cdrom_sysctl_register(void) @@ -3721,7 +3700,7 @@ static void cdrom_sysctl_register(void) if (!atomic_add_unless(&initialized, 1, 1)) return; - cdrom_sysctl_header = register_sysctl_table(cdrom_root_table); + cdrom_sysctl_header = register_sysctl("dev/cdrom", cdrom_table); /* set the defaults */ cdrom_sysctl_settings.autoclose = autoclose; diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 4e5431f01450bb0de81d0709f626e2df460abe6a..563dfae3b8dafec6af54eae661b27525b5a3d945 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -746,26 +746,6 @@ static struct ctl_table hpet_table[] = { {} }; -static struct ctl_table hpet_root[] = { - { - .procname = "hpet", - .maxlen = 0, - .mode = 0555, - .child = hpet_table, - }, - {} -}; - -static struct ctl_table dev_root[] = { - { - .procname = "dev", - .maxlen = 0, - .mode = 0555, - .child = hpet_root, - }, - {} -}; - static struct ctl_table_header *sysctl_header; /* @@ -1061,7 +1041,7 @@ static int __init hpet_init(void) if (result < 0) return -ENODEV; - sysctl_header = register_sysctl_table(dev_root); + sysctl_header = register_sysctl("dev/hpet", hpet_table); result = acpi_bus_register_driver(&hpet_acpi_driver); if (result < 0) { diff --git a/drivers/char/random.c b/drivers/char/random.c index b411182df6f614d25148323214bc0ccbf9b7dcf7..3404a91edf29296d53fad711804fd9c82783d017 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -762,7 +762,7 @@ static bool crng_init_try_arch(struct crng_state *crng) return arch_init; } -static bool __init crng_init_try_arch_early(struct crng_state *crng) +static bool __init crng_init_try_arch_early(void) { int i; bool arch_init = true; @@ -774,7 +774,7 @@ static bool __init crng_init_try_arch_early(struct crng_state *crng) rv = random_get_entropy(); arch_init = false; } - crng->state[i] ^= rv; + primary_crng.state[i] ^= rv; } return arch_init; @@ -788,22 +788,20 @@ static void crng_initialize_secondary(struct crng_state *crng) crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } -static void __init crng_initialize_primary(struct crng_state *crng) +static void __init crng_initialize_primary(void) { - _extract_entropy(&crng->state[4], sizeof(u32) * 12); - if (crng_init_try_arch_early(crng) && trust_cpu && crng_init < 2) { + _extract_entropy(&primary_crng.state[4], sizeof(u32) * 12); + if (crng_init_try_arch_early() && trust_cpu && crng_init < 2) { invalidate_batched_entropy(); numa_crng_init(); crng_init = 2; pr_notice("crng init done (trusting CPU's manufacturer)\n"); } - crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; + primary_crng.init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } -static void crng_finalize_init(struct crng_state *crng) +static void crng_finalize_init(void) { - if (crng != &primary_crng || crng_init >= 2) - return; if (!system_wq) { /* We can't call numa_crng_init until we have workqueues, * so mark this for processing later. */ @@ -814,6 +812,7 @@ static void crng_finalize_init(struct crng_state *crng) invalidate_batched_entropy(); numa_crng_init(); crng_init = 2; + crng_need_final_init = false; process_random_ready_list(); wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); @@ -980,7 +979,8 @@ static void crng_reseed(struct crng_state *crng, bool use_input_pool) memzero_explicit(&buf, sizeof(buf)); WRITE_ONCE(crng->init_time, jiffies); spin_unlock_irqrestore(&crng->lock, flags); - crng_finalize_init(crng); + if (crng == &primary_crng && crng_init < 2) + crng_finalize_init(); } static void _extract_crng(struct crng_state *crng, u8 out[CHACHA_BLOCK_SIZE]) @@ -1697,8 +1697,8 @@ int __init rand_initialize(void) { init_std_data(); if (crng_need_final_init) - crng_finalize_init(&primary_crng); - crng_initialize_primary(&primary_crng); + crng_finalize_init(); + crng_initialize_primary(); crng_global_init_time = jiffies; if (ratelimit_disable) { urandom_warning.interval = 0; @@ -1856,7 +1856,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - input_pool.entropy_count = 0; + if (xchg(&input_pool.entropy_count, 0) && random_write_wakeup_bits) { + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } return 0; case RNDRESEEDCRNG: if (!capable(CAP_SYS_ADMIN)) @@ -1992,8 +1995,7 @@ static int proc_do_entropy(struct ctl_table *table, int write, void *buffer, } static int sysctl_poolsize = POOL_BITS; -extern struct ctl_table random_table[]; -struct ctl_table random_table[] = { +static struct ctl_table random_table[] = { { .procname = "poolsize", .data = &sysctl_poolsize, @@ -2055,6 +2057,17 @@ struct ctl_table random_table[] = { #endif { } }; + +/* + * rand_initialize() is called before sysctl_init(), + * so we cannot call register_sysctl_init() in rand_initialize() + */ +static int __init random_sysctls_init(void) +{ + register_sysctl_init("kernel/random", random_table); + return 0; +} +device_initcall(random_sysctls_init); #endif /* CONFIG_SYSCTL */ struct batched_entropy { @@ -2195,13 +2208,15 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, return; } - /* Suspend writing if we're above the trickle threshold. + /* Throttle writing if we're above the trickle threshold. * We'll be woken up again once below random_write_wakeup_thresh, - * or when the calling thread is about to terminate. + * when the calling thread is about to terminate, or once + * CRNG_RESEED_INTERVAL has lapsed. */ - wait_event_interruptible(random_write_wait, + wait_event_interruptible_timeout(random_write_wait, !system_wq || kthread_should_stop() || - POOL_ENTROPY_BITS() <= random_write_wakeup_bits); + POOL_ENTROPY_BITS() <= random_write_wakeup_bits, + CRNG_RESEED_INTERVAL); mix_pool_bytes(buffer, count); credit_entropy_bits(entropy); } diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c index b6f97960d8ee020f908dbd260b6c04c188c0b4d8..5c40ca1d4740e7f67f4a674dd08a433f78488ffc 100644 --- a/drivers/clocksource/timer-ti-dm-systimer.c +++ b/drivers/clocksource/timer-ti-dm-systimer.c @@ -241,7 +241,7 @@ static void __init dmtimer_systimer_assign_alwon(void) bool quirk_unreliable_oscillator = false; /* Quirk unreliable 32 KiHz oscillator with incomplete dts */ - if (of_machine_is_compatible("ti,omap3-beagle") || + if (of_machine_is_compatible("ti,omap3-beagle-ab4") || of_machine_is_compatible("timll,omap3-devkit8000")) { quirk_unreliable_oscillator = true; counter_32k = -ENODEV; diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 646ad385e490432441514bfc818950822ce44631..ccac1c453080b202b1161b0ec3b35ffbec102129 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -358,7 +358,7 @@ static void cn_proc_mcast_ctl(struct cn_msg *msg, * other namespaces. */ if ((current_user_ns() != &init_user_ns) || - (task_active_pid_ns(current) != &init_pid_ns)) + !task_is_in_init_pid_ns(current)) return; /* Can only change if privileged. */ diff --git a/drivers/counter/counter-core.c b/drivers/counter/counter-core.c index 7e0957eea094672eaf7b9934a6db7852d86c802e..869894b747413cf1d37907ba61ae59215cf2b878 100644 --- a/drivers/counter/counter-core.c +++ b/drivers/counter/counter-core.c @@ -90,10 +90,8 @@ struct counter_device *counter_alloc(size_t sizeof_priv) int err; ch = kzalloc(sizeof(*ch) + sizeof_priv, GFP_KERNEL); - if (!ch) { - err = -ENOMEM; - goto err_alloc_ch; - } + if (!ch) + return NULL; counter = &ch->counter; dev = &counter->dev; @@ -123,9 +121,8 @@ err_chrdev_add: err_ida_alloc: kfree(ch); -err_alloc_ch: - return ERR_PTR(err); + return NULL; } EXPORT_SYMBOL_GPL(counter_alloc); @@ -208,12 +205,12 @@ struct counter_device *devm_counter_alloc(struct device *dev, size_t sizeof_priv int err; counter = counter_alloc(sizeof_priv); - if (IS_ERR(counter)) - return counter; + if (!counter) + return NULL; err = devm_add_action_or_reset(dev, devm_counter_put, counter); if (err < 0) - return ERR_PTR(err); + return NULL; return counter; } diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c index 4c8ebdf671ca8c3a02cc975131583dfac13fdb36..1b4d425bbf0e48ea08ebf80c2bd0acfca4c2f06f 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -1753,7 +1753,6 @@ void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf) char engs_info[2 * OTX2_CPT_NAME_LENGTH]; struct otx2_cpt_eng_grp_info *grp; struct otx2_cpt_engs_rsvd *engs; - u32 mask[4]; int i, j; pr_debug("Engine groups global info"); @@ -1785,6 +1784,8 @@ void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf) for (j = 0; j < OTX2_CPT_MAX_ETYPES_PER_GRP; j++) { engs = &grp->engs[j]; if (engs->type) { + u32 mask[5] = { }; + get_engs_info(grp, engs_info, 2 * OTX2_CPT_NAME_LENGTH, j); pr_debug("Slot%d: %s", j, engs_info); diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 56bf5ad01ad54e40c297a4697a2b7f3aea1058cb..8f5848aa144fec35a2de2ea68558c27dc3083669 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -135,6 +136,7 @@ static long dma_heap_ioctl(struct file *file, unsigned int ucmd, if (nr >= ARRAY_SIZE(dma_heap_ioctl_cmds)) return -EINVAL; + nr = array_index_nospec(nr, ARRAY_SIZE(dma_heap_ioctl_cmds)); /* Get the kernel ioctl cmd that matches */ kcmd = dma_heap_ioctl_cmds[nr]; diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 3a6d2416cb0f634689dfe52749a27c9778ab0bcd..5dd29789f97d3d5d8411ea306c8cbcb317ac9f07 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -350,7 +350,7 @@ static int altr_sdram_probe(struct platform_device *pdev) if (irq < 0) { edac_printk(KERN_ERR, EDAC_MC, "No irq %d in DT\n", irq); - return -ENODEV; + return irq; } /* Arria10 has a 2nd IRQ */ diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index 2ccd1db5e98ff0bcc9cae1db0b3a74f3fa434841..7197f9fa024572f72aec324af64cd382820ffac1 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -1919,7 +1919,7 @@ static int xgene_edac_probe(struct platform_device *pdev) irq = platform_get_irq_optional(pdev, i); if (irq < 0) { dev_err(&pdev->dev, "No IRQ resource\n"); - rc = -EINVAL; + rc = irq; goto out_err; } rc = devm_request_irq(&pdev->dev, irq, diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index ae79c33001297aba6fbf6f6b4dc572b6dc317702..7de3f5b6e8d0acbd93de793c87db10d442a9d250 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -722,6 +722,13 @@ void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr, systab_hdr->revision >> 16, systab_hdr->revision & 0xffff, vendor); + + if (IS_ENABLED(CONFIG_X86_64) && + systab_hdr->revision > EFI_1_10_SYSTEM_TABLE_REVISION && + !strcmp(vendor, "Apple")) { + pr_info("Apple Mac detected, using EFI v1.10 runtime services only\n"); + efi.runtime_version = EFI_1_10_SYSTEM_TABLE_REVISION; + } } static __initdata char memory_type_name[][13] = { diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 2363fee9211c98d597a6863cec5771bcd8058261..9cc556013d085991a4825643de486547069e35e2 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -119,9 +119,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (image->image_base != _text) efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n"); - if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN)) - efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n", - EFI_KIMG_ALIGN >> 10); + if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN)) + efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n", + SEGMENT_ALIGN >> 10); kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index e87e7f1b1a33a40d811b9ee00f71e9d0625e5afa..da93864d7abcd104495e653b7d3079f31c1b7ed0 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -40,6 +40,8 @@ #ifdef CONFIG_ARM64 # define EFI_RT_VIRTUAL_LIMIT DEFAULT_MAP_WINDOW_64 +#elif defined(CONFIG_RISCV) +# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_MIN #else # define EFI_RT_VIRTUAL_LIMIT TASK_SIZE #endif diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 869dc952cf45218b7cc7c295f31ea730aaab836a..0cb2664085cf83144a5a78253b255653d5a6a03a 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -278,7 +278,8 @@ static int gpio_fwd_get(struct gpio_chip *chip, unsigned int offset) { struct gpiochip_fwd *fwd = gpiochip_get_data(chip); - return gpiod_get_value(fwd->descs[offset]); + return chip->can_sleep ? gpiod_get_value_cansleep(fwd->descs[offset]) + : gpiod_get_value(fwd->descs[offset]); } static int gpio_fwd_get_multiple(struct gpiochip_fwd *fwd, unsigned long *mask, @@ -293,7 +294,10 @@ static int gpio_fwd_get_multiple(struct gpiochip_fwd *fwd, unsigned long *mask, for_each_set_bit(i, mask, fwd->chip.ngpio) descs[j++] = fwd->descs[i]; - error = gpiod_get_array_value(j, descs, NULL, values); + if (fwd->chip.can_sleep) + error = gpiod_get_array_value_cansleep(j, descs, NULL, values); + else + error = gpiod_get_array_value(j, descs, NULL, values); if (error) return error; @@ -328,7 +332,10 @@ static void gpio_fwd_set(struct gpio_chip *chip, unsigned int offset, int value) { struct gpiochip_fwd *fwd = gpiochip_get_data(chip); - gpiod_set_value(fwd->descs[offset], value); + if (chip->can_sleep) + gpiod_set_value_cansleep(fwd->descs[offset], value); + else + gpiod_set_value(fwd->descs[offset], value); } static void gpio_fwd_set_multiple(struct gpiochip_fwd *fwd, unsigned long *mask, @@ -343,7 +350,10 @@ static void gpio_fwd_set_multiple(struct gpiochip_fwd *fwd, unsigned long *mask, descs[j++] = fwd->descs[i]; } - gpiod_set_array_value(j, descs, NULL, values); + if (fwd->chip.can_sleep) + gpiod_set_array_value_cansleep(j, descs, NULL, values); + else + gpiod_set_array_value(j, descs, NULL, values); } static void gpio_fwd_set_multiple_locked(struct gpio_chip *chip, diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c index 403f9e833d6a3509ae25ebc0a595c32dc6a48cac..7d82388b4ab7c4bde29f81d4d23da99f241d1991 100644 --- a/drivers/gpio/gpio-sifive.c +++ b/drivers/gpio/gpio-sifive.c @@ -223,7 +223,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) NULL, chip->base + SIFIVE_GPIO_OUTPUT_EN, chip->base + SIFIVE_GPIO_INPUT_EN, - 0); + BGPIOF_READ_OUTPUT_REG_SET); if (ret) { dev_err(dev, "unable to init generic GPIO\n"); return ret; diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 838bbfed11d3561bac551aeb5e2e2752d7650e4e..153fe79e1bf3e7d113c62d783ba86f9815a2247b 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -570,6 +570,11 @@ static struct gpio_sim_bank *to_gpio_sim_bank(struct config_item *item) return container_of(group, struct gpio_sim_bank, group); } +static bool gpio_sim_bank_has_label(struct gpio_sim_bank *bank) +{ + return bank->label && *bank->label; +} + static struct gpio_sim_device * gpio_sim_bank_get_device(struct gpio_sim_bank *bank) { @@ -770,9 +775,15 @@ static int gpio_sim_add_hogs(struct gpio_sim_device *dev) * point the device doesn't exist yet and so dev_name() * is not available. */ - hog->chip_label = kasprintf(GFP_KERNEL, - "gpio-sim.%u-%s", dev->id, - fwnode_get_name(bank->swnode)); + if (gpio_sim_bank_has_label(bank)) + hog->chip_label = kstrdup(bank->label, + GFP_KERNEL); + else + hog->chip_label = kasprintf(GFP_KERNEL, + "gpio-sim.%u-%s", + dev->id, + fwnode_get_name( + bank->swnode)); if (!hog->chip_label) { gpio_sim_remove_hogs(dev); return -ENOMEM; @@ -816,7 +827,7 @@ gpio_sim_make_bank_swnode(struct gpio_sim_bank *bank, properties[prop_idx++] = PROPERTY_ENTRY_U32("ngpios", bank->num_lines); - if (bank->label) + if (gpio_sim_bank_has_label(bank)) properties[prop_idx++] = PROPERTY_ENTRY_STRING("gpio-sim,label", bank->label); diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index c7b5446d01fd2363fa65c9d681cfd52bcfe042e7..ffa0256cad5a0b83241f43959cbce11d32a8c23f 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -330,7 +330,7 @@ static int linehandle_create(struct gpio_device *gdev, void __user *ip) goto out_free_lh; } - ret = gpiod_request(desc, lh->label); + ret = gpiod_request_user(desc, lh->label); if (ret) goto out_free_lh; lh->descs[i] = desc; @@ -1378,7 +1378,7 @@ static int linereq_create(struct gpio_device *gdev, void __user *ip) goto out_free_linereq; } - ret = gpiod_request(desc, lr->label); + ret = gpiod_request_user(desc, lr->label); if (ret) goto out_free_linereq; @@ -1764,7 +1764,7 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip) } } - ret = gpiod_request(desc, le->label); + ret = gpiod_request_user(desc, le->label); if (ret) goto out_free_le; le->desc = desc; diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c index 4098bc7f88b7e244db4e2b7f4d9b19e24e4512e7..44c1ad51b3fe9088d7235d50540574e1411a2674 100644 --- a/drivers/gpio/gpiolib-sysfs.c +++ b/drivers/gpio/gpiolib-sysfs.c @@ -475,12 +475,9 @@ static ssize_t export_store(struct class *class, * they may be undone on its behalf too. */ - status = gpiod_request(desc, "sysfs"); - if (status) { - if (status == -EPROBE_DEFER) - status = -ENODEV; + status = gpiod_request_user(desc, "sysfs"); + if (status) goto done; - } status = gpiod_set_transitory(desc, false); if (!status) { diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index 30bc3f80f83e6c2a71bdb47bb9d054f9011ed175..c31f4626915de3936c1daf89c559738eb475040a 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -135,6 +135,18 @@ struct gpio_desc { int gpiod_request(struct gpio_desc *desc, const char *label); void gpiod_free(struct gpio_desc *desc); + +static inline int gpiod_request_user(struct gpio_desc *desc, const char *label) +{ + int ret; + + ret = gpiod_request(desc, label); + if (ret == -EPROBE_DEFER) + ret = -ENODEV; + + return ret; +} + int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id, unsigned long lflags, enum gpiod_flags dflags); int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index d8b854fcbffa7dc8a4925bf90eabab4475f04696..9a53a4de2bb7c4c461ecdd8d18559da834052b90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1408,12 +1408,10 @@ int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_sta int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps); -bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); void amdgpu_acpi_detect(void); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } -static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } static inline void amdgpu_acpi_detect(void) { } static inline bool amdgpu_acpi_is_power_shift_control_supported(void) { return false; } static inline int amdgpu_acpi_power_shift_control(struct amdgpu_device *adev, @@ -1422,6 +1420,14 @@ static inline int amdgpu_acpi_smart_shift_update(struct drm_device *dev, enum amdgpu_ss ss_state) { return 0; } #endif +#if defined(CONFIG_ACPI) && defined(CONFIG_SUSPEND) +bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev); +bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev); +#else +static inline bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { return false; } +static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return false; } +#endif + int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, uint64_t addr, struct amdgpu_bo **bo, struct amdgpu_bo_va_mapping **mapping); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 4811b0faafd9ad62e24edaf3d281182890440103..0e12315fa0cb87c643426bda0fa3a8f09feb3820 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -1031,6 +1031,20 @@ void amdgpu_acpi_detect(void) } } +#if IS_ENABLED(CONFIG_SUSPEND) +/** + * amdgpu_acpi_is_s3_active + * + * @adev: amdgpu_device_pointer + * + * returns true if supported, false if not. + */ +bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) +{ + return !(adev->flags & AMD_IS_APU) || + (pm_suspend_target_state == PM_SUSPEND_MEM); +} + /** * amdgpu_acpi_is_s0ix_active * @@ -1040,11 +1054,24 @@ void amdgpu_acpi_detect(void) */ bool amdgpu_acpi_is_s0ix_active(struct amdgpu_device *adev) { -#if IS_ENABLED(CONFIG_AMD_PMC) && IS_ENABLED(CONFIG_SUSPEND) - if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { - if (adev->flags & AMD_IS_APU) - return pm_suspend_target_state == PM_SUSPEND_TO_IDLE; + if (!(adev->flags & AMD_IS_APU) || + (pm_suspend_target_state != PM_SUSPEND_TO_IDLE)) + return false; + + if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0)) { + dev_warn_once(adev->dev, + "Power consumption will be higher as BIOS has not been configured for suspend-to-idle.\n" + "To use suspend-to-idle change the sleep mode in BIOS setup.\n"); + return false; } -#endif + +#if !IS_ENABLED(CONFIG_AMD_PMC) + dev_warn_once(adev->dev, + "Power consumption will be higher as the kernel has not been compiled with CONFIG_AMD_PMC.\n"); return false; +#else + return true; +#endif /* CONFIG_AMD_PMC */ } + +#endif /* CONFIG_SUSPEND */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b21bcdc97460779ef53b83521a8e19188b60f16a..63a08999264517f98655bfa18f3fab1f0f3ac74c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1525,6 +1525,87 @@ static const u16 amdgpu_unsupported_pciidlist[] = { 0x99A0, 0x99A2, 0x99A4, + /* radeon secondary ids */ + 0x3171, + 0x3e70, + 0x4164, + 0x4165, + 0x4166, + 0x4168, + 0x4170, + 0x4171, + 0x4172, + 0x4173, + 0x496e, + 0x4a69, + 0x4a6a, + 0x4a6b, + 0x4a70, + 0x4a74, + 0x4b69, + 0x4b6b, + 0x4b6c, + 0x4c6e, + 0x4e64, + 0x4e65, + 0x4e66, + 0x4e67, + 0x4e68, + 0x4e69, + 0x4e6a, + 0x4e71, + 0x4f73, + 0x5569, + 0x556b, + 0x556d, + 0x556f, + 0x5571, + 0x5854, + 0x5874, + 0x5940, + 0x5941, + 0x5b72, + 0x5b73, + 0x5b74, + 0x5b75, + 0x5d44, + 0x5d45, + 0x5d6d, + 0x5d6f, + 0x5d72, + 0x5d77, + 0x5e6b, + 0x5e6d, + 0x7120, + 0x7124, + 0x7129, + 0x712e, + 0x712f, + 0x7162, + 0x7163, + 0x7166, + 0x7167, + 0x7172, + 0x7173, + 0x71a0, + 0x71a1, + 0x71a3, + 0x71a7, + 0x71bb, + 0x71e0, + 0x71e1, + 0x71e2, + 0x71e6, + 0x71e7, + 0x71f2, + 0x7269, + 0x726b, + 0x726e, + 0x72a0, + 0x72a8, + 0x72b1, + 0x72b3, + 0x793f, }; static const struct pci_device_id pciidlist[] = { @@ -2165,13 +2246,20 @@ static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work) static int amdgpu_pmops_prepare(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); /* Return a positive number here so * DPM_FLAG_SMART_SUSPEND works properly */ if (amdgpu_device_supports_boco(drm_dev)) - return pm_runtime_suspended(dev) && - pm_suspend_via_firmware(); + return pm_runtime_suspended(dev); + + /* if we will not support s3 or s2i for the device + * then skip suspend + */ + if (!amdgpu_acpi_is_s0ix_active(adev) && + !amdgpu_acpi_is_s3_active(adev)) + return 1; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5c3f24069f2a6d685624c4dbadbcbcd218f5f2b9..4655702a5e007bd5086e11eca8f7876542f32e38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1904,7 +1904,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, unsigned i; int r; - if (direct_submit && !ring->sched.ready) { + if (!direct_submit && !ring->sched.ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index b4eddf6e98a6a23c861af2d4999d0c220d4a1e06..ff738e9725ee8fb15b1e18d6cb0b802c182c5cf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -543,7 +543,9 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se * adev->gfx.config.max_shader_engines); - if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) { + switch (adev->ip_versions[GC_HWIP][0]) { + case IP_VERSION(10, 3, 1): + case IP_VERSION(10, 3, 3): /* Get SA disabled bitmap from eFuse setting */ efuse_setting = RREG32_SOC15(GC, 0, mmCC_GC_SA_UNIT_DISABLE); efuse_setting &= CC_GC_SA_UNIT_DISABLE__SA_DISABLE_MASK; @@ -566,6 +568,9 @@ static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev) disabled_sa = tmp; WREG32_SOC15(GC, 0, mmGCUTCL2_HARVEST_BYPASS_GROUPS_YELLOW_CARP, disabled_sa); + break; + default: + break; } } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 38bb42727715d26948e4cee9dde57506a2832b68..a2f8ed0e6a644c566efd7172b154c2d30b25227a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1140,6 +1140,9 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u32 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 3)) + return; + adev->mmhub.funcs->get_clockgating(adev, flags); if (adev->ip_versions[ATHUB_HWIP][0] >= IP_VERSION(2, 1, 0)) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7f9773f8dab66543f21304c81db90adbd29f686a..7c1c623ba79930a6fa4779d93287e8f57b96adfa 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -3653,7 +3653,7 @@ static int dcn10_register_irq_handlers(struct amdgpu_device *adev) /* Use GRPH_PFLIP interrupt */ for (i = DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT; - i <= DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT + adev->mode_info.num_crtc - 1; + i <= DCN_1_0__SRCID__HUBP0_FLIP_INTERRUPT + dc->caps.max_otg_num - 1; i++) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_DCE, i, &adev->pageflip_irq); if (r) { diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index ff5bb152ef494f96667cb941a7e1976578910571..e6ef36de082540f1a91985f800db41a16cf9e6b6 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -2033,10 +2033,10 @@ static void calculate_bandwidth( kfree(surface_type); free_tiling_mode: kfree(tiling_mode); -free_yclk: - kfree(yclk); free_sclk: kfree(sclk); +free_yclk: + kfree(yclk); } /******************************************************************************* diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c index ec19678a070213d85322cfdff065c4e0eb7f665a..e447c74be713826183e4619093fc791a84436f22 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dcn_calcs.c @@ -503,7 +503,6 @@ static void dcn_bw_calc_rq_dlg_ttu( //input[in_idx].dout.output_standard; /*todo: soc->sr_enter_plus_exit_time??*/ - dlg_sys_param->t_srx_delay_us = dc->dcn_ip->dcfclk_cstate_latency / v->dcf_clk_deep_sleep; dml1_rq_dlg_get_rq_params(dml, rq_param, &input->pipe.src); dml1_extract_rq_regs(dml, rq_regs, rq_param); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c index 48005def11645cf606b198b3efeb48d730f88c54..bc4ddc36fe58b4bb797adfe5a459fb211e611ffd 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn301/vg_clk_mgr.c @@ -570,32 +570,32 @@ static struct wm_table lpddr5_wm_table = { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 7.95, - .sr_enter_plus_exit_time_us = 9, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.82, - .sr_enter_plus_exit_time_us = 11.196, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.89, - .sr_enter_plus_exit_time_us = 11.24, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.65333, - .sr_exit_time_us = 9.748, - .sr_enter_plus_exit_time_us = 11.102, + .sr_exit_time_us = 13.5, + .sr_enter_plus_exit_time_us = 16.5, .valid = true, }, } diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c index 4162ce40089b17d53666f9a110984c0dfba11b55..9d17c5a5ae01b3ebb8f9d8f675a068192e8da755 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c @@ -329,38 +329,38 @@ static struct clk_bw_params dcn31_bw_params = { }; -static struct wm_table ddr4_wm_table = { +static struct wm_table ddr5_wm_table = { .entries = { { .wm_inst = WM_A, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 6.09, - .sr_enter_plus_exit_time_us = 7.14, + .sr_exit_time_us = 9, + .sr_enter_plus_exit_time_us = 11, .valid = true, }, { .wm_inst = WM_B, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, + .sr_exit_time_us = 9, + .sr_enter_plus_exit_time_us = 11, .valid = true, }, { .wm_inst = WM_C, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, + .sr_exit_time_us = 9, + .sr_enter_plus_exit_time_us = 11, .valid = true, }, { .wm_inst = WM_D, .wm_type = WM_TYPE_PSTATE_CHG, .pstate_latency_us = 11.72, - .sr_exit_time_us = 10.12, - .sr_enter_plus_exit_time_us = 11.48, + .sr_exit_time_us = 9, + .sr_enter_plus_exit_time_us = 11, .valid = true, }, } @@ -687,7 +687,7 @@ void dcn31_clk_mgr_construct( if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType) { dcn31_bw_params.wm_table = lpddr5_wm_table; } else { - dcn31_bw_params.wm_table = ddr4_wm_table; + dcn31_bw_params.wm_table = ddr5_wm_table; } /* Saved clocks configured at boot for debug purposes */ dcn31_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info); diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c index a1011f3273f384cf7f0980a434e36bbce45d117a..de3f4643eeef9d2547bc190a18434a3586a5b92d 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c @@ -120,7 +120,11 @@ static int dcn31_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr, result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000); if (result == VBIOSSMC_Result_Failed) { - ASSERT(0); + if (msg_id == VBIOSSMC_MSG_TransferTableDram2Smu && + param == TABLE_WATERMARKS) + DC_LOG_WARNING("Watermarks table not configured properly by SMU"); + else + ASSERT(0); REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK); return -1; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 01c8849b9db2becbc70158973b0a00b9841ff97d..d18e9f3ea99832799fd6935278c476571caf1584 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -1220,6 +1220,8 @@ struct dc *dc_create(const struct dc_init_data *init_params) dc->caps.max_dp_protocol_version = DP_VERSION_1_4; + dc->caps.max_otg_num = dc->res_pool->res_cap->num_timing_generator; + if (dc->res_pool->dmcu != NULL) dc->versions.dmcu_version = dc->res_pool->dmcu->dmcu_version; } @@ -1404,20 +1406,34 @@ static void program_timing_sync( status->timing_sync_info.master = false; } - /* remove any other unblanked pipes as they have already been synced */ - for (j = j + 1; j < group_size; j++) { - bool is_blanked; - if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked) - is_blanked = - pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp); - else - is_blanked = - pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg); - if (!is_blanked) { - group_size--; - pipe_set[j] = pipe_set[group_size]; - j--; + /* remove any other pipes that are already been synced */ + if (dc->config.use_pipe_ctx_sync_logic) { + /* check pipe's syncd to decide which pipe to be removed */ + for (j = 1; j < group_size; j++) { + if (pipe_set[j]->pipe_idx_syncd == pipe_set[0]->pipe_idx_syncd) { + group_size--; + pipe_set[j] = pipe_set[group_size]; + j--; + } else + /* link slave pipe's syncd with master pipe */ + pipe_set[j]->pipe_idx_syncd = pipe_set[0]->pipe_idx_syncd; + } + } else { + for (j = j + 1; j < group_size; j++) { + bool is_blanked; + + if (pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked) + is_blanked = + pipe_set[j]->stream_res.opp->funcs->dpg_is_blanked(pipe_set[j]->stream_res.opp); + else + is_blanked = + pipe_set[j]->stream_res.tg->funcs->is_blanked(pipe_set[j]->stream_res.tg); + if (!is_blanked) { + group_size--; + pipe_set[j] = pipe_set[group_size]; + j--; + } } } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index 05e216524370d4099211cdd9b4b571da7a410c94..61b8f29a0c303443f9ca03ed699c7626feca0d44 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -202,7 +202,7 @@ void dp_wait_for_training_aux_rd_interval( uint32_t wait_in_micro_secs) { #if defined(CONFIG_DRM_AMD_DC_DCN) - if (wait_in_micro_secs > 16000) + if (wait_in_micro_secs > 1000) msleep(wait_in_micro_secs/1000); else udelay(wait_in_micro_secs); @@ -5597,6 +5597,26 @@ static bool retrieve_link_cap(struct dc_link *link) dp_hw_fw_revision.ieee_fw_rev, sizeof(dp_hw_fw_revision.ieee_fw_rev)); + /* Quirk for Apple MBP 2018 15" Retina panels: wrong DP_MAX_LINK_RATE */ + { + uint8_t str_mbp_2018[] = { 101, 68, 21, 103, 98, 97 }; + uint8_t fwrev_mbp_2018[] = { 7, 4 }; + uint8_t fwrev_mbp_2018_vega[] = { 8, 4 }; + + /* We also check for the firmware revision as 16,1 models have an + * identical device id and are incorrectly quirked otherwise. + */ + if ((link->dpcd_caps.sink_dev_id == 0x0010fa) && + !memcmp(link->dpcd_caps.sink_dev_id_str, str_mbp_2018, + sizeof(str_mbp_2018)) && + (!memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018, + sizeof(fwrev_mbp_2018)) || + !memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018_vega, + sizeof(fwrev_mbp_2018_vega)))) { + link->reported_link_cap.link_rate = LINK_RATE_RBR2; + } + } + memset(&link->dpcd_caps.dsc_caps, '\0', sizeof(link->dpcd_caps.dsc_caps)); memset(&link->dpcd_caps.fec_cap, '\0', sizeof(link->dpcd_caps.fec_cap)); @@ -6935,7 +6955,7 @@ bool dpcd_write_128b_132b_sst_payload_allocation_table( } } retries++; - udelay(5000); + msleep(5); } if (!result && retries == max_retries) { @@ -6987,7 +7007,7 @@ bool dpcd_poll_for_allocation_change_trigger(struct dc_link *link) break; } - udelay(5000); + msleep(5); } if (result == ACT_FAILED) { diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index d4ff6cc6b8d9c750a73dc86f4c94c5ad25493c7c..b3912ff9dc91135b5aceb37972876a44bba7a42e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -3217,6 +3217,60 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt( } #endif +void reset_syncd_pipes_from_disabled_pipes(struct dc *dc, + struct dc_state *context) +{ + int i, j; + struct pipe_ctx *pipe_ctx_old, *pipe_ctx, *pipe_ctx_syncd; + + /* If pipe backend is reset, need to reset pipe syncd status */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe_ctx_old = &dc->current_state->res_ctx.pipe_ctx[i]; + pipe_ctx = &context->res_ctx.pipe_ctx[i]; + + if (!pipe_ctx_old->stream) + continue; + + if (pipe_ctx_old->top_pipe || pipe_ctx_old->prev_odm_pipe) + continue; + + if (!pipe_ctx->stream || + pipe_need_reprogram(pipe_ctx_old, pipe_ctx)) { + + /* Reset all the syncd pipes from the disabled pipe */ + for (j = 0; j < dc->res_pool->pipe_count; j++) { + pipe_ctx_syncd = &context->res_ctx.pipe_ctx[j]; + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_syncd) == pipe_ctx_old->pipe_idx) || + !IS_PIPE_SYNCD_VALID(pipe_ctx_syncd)) + SET_PIPE_SYNCD_TO_PIPE(pipe_ctx_syncd, j); + } + } + } +} + +void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc, + struct dc_state *context, + uint8_t disabled_master_pipe_idx) +{ + int i; + struct pipe_ctx *pipe_ctx, *pipe_ctx_check; + + pipe_ctx = &context->res_ctx.pipe_ctx[disabled_master_pipe_idx]; + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx) != disabled_master_pipe_idx) || + !IS_PIPE_SYNCD_VALID(pipe_ctx)) + SET_PIPE_SYNCD_TO_PIPE(pipe_ctx, disabled_master_pipe_idx); + + /* for the pipe disabled, check if any slave pipe exists and assert */ + for (i = 0; i < dc->res_pool->pipe_count; i++) { + pipe_ctx_check = &context->res_ctx.pipe_ctx[i]; + + if ((GET_PIPE_SYNCD_FROM_PIPE(pipe_ctx_check) == disabled_master_pipe_idx) && + IS_PIPE_SYNCD_VALID(pipe_ctx_check) && (i != disabled_master_pipe_idx)) + DC_ERR("DC: Failure: pipe_idx[%d] syncd with disabled master pipe_idx[%d]\n", + i, disabled_master_pipe_idx); + } +} + uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter) { /* TODO - get transmitter to phy idx mapping from DMUB */ diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index da2c78ce14d6ce06d5613d54d86fdb4c68c19309..b518648906212678186836b6e4834ab71e5583ad 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -202,6 +202,7 @@ struct dc_caps { bool edp_dsc_support; bool vbios_lttpr_aware; bool vbios_lttpr_enable; + uint32_t max_otg_num; }; struct dc_bug_wa { @@ -344,6 +345,7 @@ struct dc_config { uint8_t vblank_alignment_max_frame_time_diff; bool is_asymmetric_memory; bool is_single_rank_dimm; + bool use_pipe_ctx_sync_logic; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 78192ecba102a87a932c58599c44df3548c9e20c..eb2755bdb30ed2144eee8ffc5276088f813de0a1 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1566,6 +1566,10 @@ static enum dc_status apply_single_controller_ctx_to_hw( &pipe_ctx->stream->audio_info); } + /* make sure no pipes syncd to the pipe being enabled */ + if (!pipe_ctx->stream->apply_seamless_boot_optimization && dc->config.use_pipe_ctx_sync_logic) + check_syncd_pipes_for_disabled_master_pipe(dc, context, pipe_ctx->pipe_idx); + #if defined(CONFIG_DRM_AMD_DC_DCN) /* DCN3.1 FPGA Workaround * Need to enable HPO DP Stream Encoder before setting OTG master enable. @@ -1604,11 +1608,6 @@ static enum dc_status apply_single_controller_ctx_to_hw( pipe_ctx->stream_res.stream_enc, pipe_ctx->stream_res.tg->inst); - if (dc_is_dp_signal(pipe_ctx->stream->signal) && - pipe_ctx->stream_res.stream_enc->funcs->reset_fifo) - pipe_ctx->stream_res.stream_enc->funcs->reset_fifo( - pipe_ctx->stream_res.stream_enc); - if (dc_is_dp_signal(pipe_ctx->stream->signal)) dp_source_sequence_trace(link, DPCD_SOURCE_SEQ_AFTER_CONNECT_DIG_FE_OTG); @@ -1835,9 +1834,29 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context) break; } } - // We are trying to enable eDP, don't power down VDD - if (can_apply_edp_fast_boot) + + /* + * TO-DO: So far the code logic below only addresses single eDP case. + * For dual eDP case, there are a few things that need to be + * implemented first: + * + * 1. Change the fastboot logic above, so eDP link[0 or 1]'s + * stream[0 or 1] will all be checked. + * + * 2. Change keep_edp_vdd_on to an array, and maintain keep_edp_vdd_on + * for each eDP. + * + * Once above 2 things are completed, we can then change the logic below + * correspondingly, so dual eDP case will be fully covered. + */ + + // We are trying to enable eDP, don't power down VDD if eDP stream is existing + if ((edp_stream_num == 1 && edp_streams[0] != NULL) || can_apply_edp_fast_boot) { keep_edp_vdd_on = true; + DC_LOG_EVENT_LINK_TRAINING("Keep eDP Vdd on\n"); + } else { + DC_LOG_EVENT_LINK_TRAINING("No eDP stream enabled, turn eDP Vdd off\n"); + } } // Check seamless boot support @@ -2297,6 +2316,10 @@ enum dc_status dce110_apply_ctx_to_hw( enum dc_status status; int i; + /* reset syncd pipes from disabled pipes */ + if (dc->config.use_pipe_ctx_sync_logic) + reset_syncd_pipes_from_disabled_pipes(dc, context); + /* Reset old context */ /* look up the targets that have been removed since last commit */ hws->funcs.reset_hw_ctx_wrap(dc, context); diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c index bf4436d7aaab95e8c11c18fa9ca0cd817fa23633..b0c08ee6bc2cb316663bde8e5265a36a479785c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.c @@ -902,19 +902,6 @@ void enc1_stream_encoder_stop_dp_info_packets( } -void enc1_stream_encoder_reset_fifo( - struct stream_encoder *enc) -{ - struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc); - - /* set DIG_START to 0x1 to reset FIFO */ - REG_UPDATE(DIG_FE_CNTL, DIG_START, 1); - udelay(100); - - /* write 0 to take the FIFO out of reset */ - REG_UPDATE(DIG_FE_CNTL, DIG_START, 0); -} - void enc1_stream_encoder_dp_blank( struct dc_link *link, struct stream_encoder *enc) @@ -1600,8 +1587,6 @@ static const struct stream_encoder_funcs dcn10_str_enc_funcs = { enc1_stream_encoder_send_immediate_sdp_message, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, - .reset_fifo = - enc1_stream_encoder_reset_fifo, .dp_blank = enc1_stream_encoder_dp_blank, .dp_unblank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h index a146a41f68e9ef786bbac83da31447d646043060..687d7e4bf7cadd2129b458aaabedaa4d70fbcaa7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_stream_encoder.h @@ -626,9 +626,6 @@ void enc1_stream_encoder_send_immediate_sdp_message( void enc1_stream_encoder_stop_dp_info_packets( struct stream_encoder *enc); -void enc1_stream_encoder_reset_fifo( - struct stream_encoder *enc); - void enc1_stream_encoder_dp_blank( struct dc_link *link, struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 2bc93df023ad26d54ac9e211ff749a1e2a5a3eec..2a72517e2b28173eb1edf713291db469579afcd9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1069,7 +1069,7 @@ static const struct dc_debug_options debug_defaults_drv = { .timing_trace = false, .clock_trace = true, .disable_pplib_clock_request = true, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c index 8a70f92795c2aa80fbc9f33b13f1c561f1ff2575..aab25ca8343abe4275fc7aa379cf47210a08592f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c @@ -593,8 +593,6 @@ static const struct stream_encoder_funcs dcn20_str_enc_funcs = { enc1_stream_encoder_send_immediate_sdp_message, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, - .reset_fifo = - enc1_stream_encoder_reset_fifo, .dp_blank = enc1_stream_encoder_dp_blank, .dp_unblank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c index 8daa12730bc13e36a9b416cf63686d0c302e7952..a04ca4a9839257c6b860e11ca25d5e9ce526c041 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dio_stream_encoder.c @@ -789,8 +789,6 @@ static const struct stream_encoder_funcs dcn30_str_enc_funcs = { enc3_stream_encoder_update_dp_info_packets, .stop_dp_info_packets = enc1_stream_encoder_stop_dp_info_packets, - .reset_fifo = - enc1_stream_encoder_reset_fifo, .dp_blank = enc1_stream_encoder_dp_blank, .dp_unblank = diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 602ec9a08549a6f7555e852b1895f023323d3108..8ca26383b5687ca383bc2c9c3fb6d72f72c90a26 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -1878,7 +1878,6 @@ noinline bool dcn30_internal_validate_bw( dc->res_pool->funcs->update_soc_for_wm_a(dc, context); pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate); - DC_FP_START(); if (!pipe_cnt) { out = true; goto validate_out; @@ -2104,7 +2103,6 @@ validate_fail: out = false; validate_out: - DC_FP_END(); return out; } @@ -2306,7 +2304,9 @@ bool dcn30_validate_bandwidth(struct dc *dc, BW_VAL_TRACE_COUNT(); + DC_FP_START(); out = dcn30_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, fast_validate); + DC_FP_END(); if (pipe_cnt == 0) goto validate_out; diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index c1c6e602b06cfbe4c818f4227fbeaacfb09c57d9..5d9637b0742923a47f30c0c181f516f29d021df1 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -686,7 +686,7 @@ static const struct dc_debug_options debug_defaults_drv = { .disable_clock_gate = true, .disable_pplib_clock_request = true, .disable_pplib_wm_range = true, - .pipe_split_policy = MPC_SPLIT_DYNAMIC, + .pipe_split_policy = MPC_SPLIT_AVOID, .force_single_disp_pipe_split = false, .disable_dcc = DCC_ENABLE, .vsr_support = true, @@ -1380,6 +1380,17 @@ static void set_wm_ranges( pp_smu->nv_funcs.set_wm_ranges(&pp_smu->nv_funcs.pp_smu, &ranges); } +static void dcn301_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel) +{ + DC_FP_START(); + dcn301_calculate_wm_and_dlg_fp(dc, context, pipes, pipe_cnt, vlevel); + DC_FP_END(); +} + static struct resource_funcs dcn301_res_pool_funcs = { .destroy = dcn301_destroy_resource_pool, .link_enc_create = dcn301_link_encoder_create, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c index 90c73a1cb98614d42dc9177ed4e5100fa0a2bfc2..5e3bcaf12cac4b55460981dd2e4b79586187b2e4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubbub.c @@ -138,8 +138,11 @@ static uint32_t convert_and_clamp( ret_val = wm_ns * refclk_mhz; ret_val /= 1000; - if (ret_val > clamp_value) + if (ret_val > clamp_value) { + /* clamping WMs is abnormal, unexpected and may lead to underflow*/ + ASSERT(0); ret_val = clamp_value; + } return ret_val; } @@ -159,7 +162,7 @@ static bool hubbub31_program_urgent_watermarks( if (safe_to_lower || watermarks->a.urgent_ns > hubbub2->watermarks.a.urgent_ns) { hubbub2->watermarks.a.urgent_ns = watermarks->a.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->a.urgent_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0x3fff); REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, 0, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_A, prog_wm_value); @@ -193,7 +196,7 @@ static bool hubbub31_program_urgent_watermarks( if (safe_to_lower || watermarks->a.urgent_latency_ns > hubbub2->watermarks.a.urgent_latency_ns) { hubbub2->watermarks.a.urgent_latency_ns = watermarks->a.urgent_latency_ns; prog_wm_value = convert_and_clamp(watermarks->a.urgent_latency_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0x3fff); REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, 0, DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_A, prog_wm_value); } else if (watermarks->a.urgent_latency_ns < hubbub2->watermarks.a.urgent_latency_ns) @@ -203,7 +206,7 @@ static bool hubbub31_program_urgent_watermarks( if (safe_to_lower || watermarks->b.urgent_ns > hubbub2->watermarks.b.urgent_ns) { hubbub2->watermarks.b.urgent_ns = watermarks->b.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->b.urgent_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0x3fff); REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, 0, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_B, prog_wm_value); @@ -237,7 +240,7 @@ static bool hubbub31_program_urgent_watermarks( if (safe_to_lower || watermarks->b.urgent_latency_ns > hubbub2->watermarks.b.urgent_latency_ns) { hubbub2->watermarks.b.urgent_latency_ns = watermarks->b.urgent_latency_ns; prog_wm_value = convert_and_clamp(watermarks->b.urgent_latency_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0x3fff); REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, 0, DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_B, prog_wm_value); } else if (watermarks->b.urgent_latency_ns < hubbub2->watermarks.b.urgent_latency_ns) @@ -247,7 +250,7 @@ static bool hubbub31_program_urgent_watermarks( if (safe_to_lower || watermarks->c.urgent_ns > hubbub2->watermarks.c.urgent_ns) { hubbub2->watermarks.c.urgent_ns = watermarks->c.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->c.urgent_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0x3fff); REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, 0, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_C, prog_wm_value); @@ -281,7 +284,7 @@ static bool hubbub31_program_urgent_watermarks( if (safe_to_lower || watermarks->c.urgent_latency_ns > hubbub2->watermarks.c.urgent_latency_ns) { hubbub2->watermarks.c.urgent_latency_ns = watermarks->c.urgent_latency_ns; prog_wm_value = convert_and_clamp(watermarks->c.urgent_latency_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0x3fff); REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, 0, DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_C, prog_wm_value); } else if (watermarks->c.urgent_latency_ns < hubbub2->watermarks.c.urgent_latency_ns) @@ -291,7 +294,7 @@ static bool hubbub31_program_urgent_watermarks( if (safe_to_lower || watermarks->d.urgent_ns > hubbub2->watermarks.d.urgent_ns) { hubbub2->watermarks.d.urgent_ns = watermarks->d.urgent_ns; prog_wm_value = convert_and_clamp(watermarks->d.urgent_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0x3fff); REG_SET(DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, 0, DCHUBBUB_ARB_DATA_URGENCY_WATERMARK_D, prog_wm_value); @@ -325,7 +328,7 @@ static bool hubbub31_program_urgent_watermarks( if (safe_to_lower || watermarks->d.urgent_latency_ns > hubbub2->watermarks.d.urgent_latency_ns) { hubbub2->watermarks.d.urgent_latency_ns = watermarks->d.urgent_latency_ns; prog_wm_value = convert_and_clamp(watermarks->d.urgent_latency_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0x3fff); REG_SET(DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, 0, DCHUBBUB_ARB_REFCYC_PER_TRIP_TO_MEMORY_D, prog_wm_value); } else if (watermarks->d.urgent_latency_ns < hubbub2->watermarks.d.urgent_latency_ns) @@ -351,7 +354,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns; prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, 0, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_A calculated =%d\n" @@ -367,7 +370,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->a.cstate_pstate.cstate_exit_ns; prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, 0, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_A calculated =%d\n" @@ -383,7 +386,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns; prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.cstate_enter_plus_exit_z8_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, 0, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_A, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_A calculated =%d\n" @@ -399,7 +402,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->a.cstate_pstate.cstate_exit_z8_ns; prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.cstate_exit_z8_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, 0, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_A, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_A calculated =%d\n" @@ -416,7 +419,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns; prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, 0, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_B calculated =%d\n" @@ -432,7 +435,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->b.cstate_pstate.cstate_exit_ns; prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, 0, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_B calculated =%d\n" @@ -448,7 +451,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns; prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.cstate_enter_plus_exit_z8_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, 0, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_B, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_B calculated =%d\n" @@ -464,7 +467,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->b.cstate_pstate.cstate_exit_z8_ns; prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.cstate_exit_z8_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, 0, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_B, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_B calculated =%d\n" @@ -481,7 +484,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns; prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, 0, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_C calculated =%d\n" @@ -497,7 +500,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->c.cstate_pstate.cstate_exit_ns; prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, 0, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_C calculated =%d\n" @@ -513,7 +516,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns; prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.cstate_enter_plus_exit_z8_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, 0, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_C calculated =%d\n" @@ -529,7 +532,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->c.cstate_pstate.cstate_exit_z8_ns; prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.cstate_exit_z8_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, 0, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_C calculated =%d\n" @@ -546,7 +549,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns; prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.cstate_enter_plus_exit_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, 0, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_EXIT_WATERMARK_D calculated =%d\n" @@ -562,7 +565,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->d.cstate_pstate.cstate_exit_ns; prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.cstate_exit_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, 0, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_D calculated =%d\n" @@ -578,7 +581,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns; prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.cstate_enter_plus_exit_z8_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, 0, DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_ENTER_WATERMARK_Z8_D calculated =%d\n" @@ -594,7 +597,7 @@ static bool hubbub31_program_stutter_watermarks( watermarks->d.cstate_pstate.cstate_exit_z8_ns; prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.cstate_exit_z8_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, 0, DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("SR_EXIT_WATERMARK_Z8_D calculated =%d\n" @@ -625,7 +628,7 @@ static bool hubbub31_program_pstate_watermarks( watermarks->a.cstate_pstate.pstate_change_ns; prog_wm_value = convert_and_clamp( watermarks->a.cstate_pstate.pstate_change_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, 0, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_A calculated =%d\n" @@ -642,7 +645,7 @@ static bool hubbub31_program_pstate_watermarks( watermarks->b.cstate_pstate.pstate_change_ns; prog_wm_value = convert_and_clamp( watermarks->b.cstate_pstate.pstate_change_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, 0, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_B calculated =%d\n" @@ -659,7 +662,7 @@ static bool hubbub31_program_pstate_watermarks( watermarks->c.cstate_pstate.pstate_change_ns; prog_wm_value = convert_and_clamp( watermarks->c.cstate_pstate.pstate_change_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, 0, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_C calculated =%d\n" @@ -676,7 +679,7 @@ static bool hubbub31_program_pstate_watermarks( watermarks->d.cstate_pstate.pstate_change_ns; prog_wm_value = convert_and_clamp( watermarks->d.cstate_pstate.pstate_change_ns, - refclk_mhz, 0x1fffff); + refclk_mhz, 0xffff); REG_SET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, 0, DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, prog_wm_value); DC_LOG_BANDWIDTH_CALCS("DRAM_CLK_CHANGE_WATERMARK_D calculated =%d\n" diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c index 42ed47e8133dfb5718d7fdcfdd7dab60d30bae8b..8d64187478e42dcf51aa549258e28e507d6c9d5d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c @@ -2260,6 +2260,9 @@ static bool dcn31_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + /* read VBIOS LTTPR caps */ { if (ctx->dc_bios->funcs->get_lttpr_caps) { diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c index 246071c72f6bf5ce2ceb1f29a5c2f6d075b26556..548cdef8a8ade2236445894e8743018acb893365 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20.c @@ -1576,8 +1576,6 @@ void dml20_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c index 015e7f2c0b1602a7184f04c2ae670f6013f766ec..0fc9f3e3ffaefd2c3d56abec6734e701fab997c6 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/display_rq_dlg_calc_20v2.c @@ -1577,8 +1577,6 @@ void dml20v2_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c index 8bc27de4c104162cd8fa342ba7153785110e7692..618f4b682ab1b1c090a36867b4aabf764af518c0 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_rq_dlg_calc_21.c @@ -1688,8 +1688,6 @@ void dml21_rq_dlg_get_dlg_reg( mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c index aef8542700544ddefa91dacb1d89b8d2337c91fa..747167083dea6cc0f4bf424df30e5cac3a2ca743 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_rq_dlg_calc_30.c @@ -1858,8 +1858,6 @@ void dml30_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib, dlg_sys_param.total_flip_bytes = get_total_immediate_flip_bytes(mode_lib, e2e_pipe_param, num_pipes); - dlg_sys_param.t_srx_delay_us = mode_lib->ip.dcfclk_cstate_latency - / dlg_sys_param.deepsleep_dcfclk_mhz; // TODO: Deprecated print__dlg_sys_params_st(mode_lib, &dlg_sys_param); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c index 94c32832a0e7bc29f70cefa987465c4226ce6c42..0a7a33864973125d8bdcdc35c4c28a3d1accce98 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.c @@ -327,7 +327,7 @@ void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info) dcn3_01_soc.sr_exit_time_us = bb_info.dram_sr_exit_latency_100ns * 10; } -void dcn301_calculate_wm_and_dlg(struct dc *dc, +void dcn301_calculate_wm_and_dlg_fp(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h index fc7065d17842260581131879198619b9b321cbec..774b0fdfc80beb41bc44281c666017fbbfd8810e 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn301/dcn301_fpu.h @@ -34,7 +34,7 @@ void dcn301_fpu_set_wm_ranges(int i, void dcn301_fpu_init_soc_bounding_box(struct bp_soc_bb_info bb_info); -void dcn301_calculate_wm_and_dlg(struct dc *dc, +void dcn301_calculate_wm_and_dlg_fp(struct dc *dc, struct dc_state *context, display_e2e_pipe_params_st *pipes, int pipe_cnt, diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h index d46a2733024ce5aeb78a606ee16bb4cea277f1f7..8f9f1d607f7cb9aa43a57ae9660f257cba05ff7c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h @@ -546,7 +546,6 @@ struct _vcs_dpi_display_dlg_sys_params_st { double t_sr_wm_us; double t_extra_us; double mem_trip_us; - double t_srx_delay_us; double deepsleep_dcfclk_mhz; double total_flip_bw; unsigned int total_flip_bytes; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c index 71ea503cb32fff48d0bb53a7f771ccf4335528b7..412e75eb47041abaef36ffcfb4d6c061ac46d15c 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c +++ b/drivers/gpu/drm/amd/display/dc/dml/display_rq_dlg_helpers.c @@ -141,9 +141,6 @@ void print__dlg_sys_params_st(struct display_mode_lib *mode_lib, const struct _v dml_print("DML_RQ_DLG_CALC: t_urg_wm_us = %3.2f\n", dlg_sys_param->t_urg_wm_us); dml_print("DML_RQ_DLG_CALC: t_sr_wm_us = %3.2f\n", dlg_sys_param->t_sr_wm_us); dml_print("DML_RQ_DLG_CALC: t_extra_us = %3.2f\n", dlg_sys_param->t_extra_us); - dml_print( - "DML_RQ_DLG_CALC: t_srx_delay_us = %3.2f\n", - dlg_sys_param->t_srx_delay_us); dml_print( "DML_RQ_DLG_CALC: deepsleep_dcfclk_mhz = %3.2f\n", dlg_sys_param->deepsleep_dcfclk_mhz); diff --git a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c index 59dc2c5b58dd71da980c391bacf0897146de3185..3df559c591f890c1be146b5a0379f8ff4272b193 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c +++ b/drivers/gpu/drm/amd/display/dc/dml/dml1_display_rq_dlg_calc.c @@ -1331,10 +1331,6 @@ void dml1_rq_dlg_get_dlg_params( if (dual_plane) DTRACE("DLG: %s: swath_height_c = %d", __func__, swath_height_c); - DTRACE( - "DLG: %s: t_srx_delay_us = %3.2f", - __func__, - (double) dlg_sys_param->t_srx_delay_us); DTRACE("DLG: %s: line_time_in_us = %3.2f", __func__, (double) line_time_in_us); DTRACE("DLG: %s: vupdate_offset = %d", __func__, vupdate_offset); DTRACE("DLG: %s: vupdate_width = %d", __func__, vupdate_width); diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h index 890280026e69406a68b2dc1953055e68a423a15f..943240e2809e95cff2d1a9d4e9a86d9ec57f929f 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h +++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h @@ -382,6 +382,7 @@ struct pipe_ctx { struct pll_settings pll_settings; uint8_t pipe_idx; + uint8_t pipe_idx_syncd; struct pipe_ctx *top_pipe; struct pipe_ctx *bottom_pipe; diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h index 073f8b667eff56ab3249d2214e94a9a88b25604c..c88e113b94d1204038b9680f17c35bb8bdfc6c19 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h @@ -164,10 +164,6 @@ struct stream_encoder_funcs { void (*stop_dp_info_packets)( struct stream_encoder *enc); - void (*reset_fifo)( - struct stream_encoder *enc - ); - void (*dp_blank)( struct dc_link *link, struct stream_encoder *enc); diff --git a/drivers/gpu/drm/amd/display/dc/inc/resource.h b/drivers/gpu/drm/amd/display/dc/inc/resource.h index 4249bf306e09d4f1ff1eb5257d939f778519d872..dbfe6690ded8a7353b26960067780225724b2c81 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/resource.h +++ b/drivers/gpu/drm/amd/display/dc/inc/resource.h @@ -34,6 +34,10 @@ #define MEMORY_TYPE_HBM 2 +#define IS_PIPE_SYNCD_VALID(pipe) ((((pipe)->pipe_idx_syncd) & 0x80)?1:0) +#define GET_PIPE_SYNCD_FROM_PIPE(pipe) ((pipe)->pipe_idx_syncd & 0x7F) +#define SET_PIPE_SYNCD_TO_PIPE(pipe, pipe_syncd) ((pipe)->pipe_idx_syncd = (0x80 | pipe_syncd)) + enum dce_version resource_parse_asic_id( struct hw_asic_id asic_id); @@ -208,6 +212,13 @@ struct hpo_dp_link_encoder *resource_get_hpo_dp_link_enc_for_det_lt( const struct dc_link *link); #endif +void reset_syncd_pipes_from_disabled_pipes(struct dc *dc, + struct dc_state *context); + +void check_syncd_pipes_for_disabled_master_pipe(struct dc *dc, + struct dc_state *context, + uint8_t disabled_master_pipe_idx); + uint8_t resource_transmitter_to_phy_idx(const struct dc *dc, enum transmitter transmitter); #endif /* DRIVERS_GPU_DRM_AMD_DC_DEV_DC_INC_RESOURCE_H_ */ diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index e2cae97f4ff134c3c45a86914cc01082f682c232..48cc009d9bdf3fd3979974a668925be98c937e86 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -3462,8 +3462,7 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_power2_cap_min.dev_attr.attr || attr == &sensor_dev_attr_power2_cap.dev_attr.attr || attr == &sensor_dev_attr_power2_cap_default.dev_attr.attr || - attr == &sensor_dev_attr_power2_label.dev_attr.attr || - attr == &sensor_dev_attr_power1_label.dev_attr.attr)) + attr == &sensor_dev_attr_power2_label.dev_attr.attr)) return 0; return effective_mode; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 777f717c37aec14423bc6a2067df28d7e204c6f3..a4207293158c5d37ab5a6a78a06a2cbec1fb6d5e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -3696,14 +3696,14 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu, static int sienna_cichlid_enable_mgpu_fan_boost(struct smu_context *smu) { - struct smu_table_context *table_context = &smu->smu_table; - PPTable_t *smc_pptable = table_context->driver_pptable; + uint16_t *mgpu_fan_boost_limit_rpm; + GET_PPTABLE_MEMBER(MGpuFanBoostLimitRpm, &mgpu_fan_boost_limit_rpm); /* * Skip the MGpuFanBoost setting for those ASICs * which do not support it */ - if (!smc_pptable->MGpuFanBoostLimitRpm) + if (*mgpu_fan_boost_limit_rpm == 0) return 0; return smu_cmn_send_smc_msg_with_param(smu, diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h index d9eb353a4bf09b6055c675064ec05695b22dcd0e..dbe1cc620f6e64940d3330eb8d7bf3adaea178ae 100644 --- a/drivers/gpu/drm/ast/ast_tables.h +++ b/drivers/gpu/drm/ast/ast_tables.h @@ -282,8 +282,6 @@ static const struct ast_vbios_enhtable res_1360x768[] = { }; static const struct ast_vbios_enhtable res_1600x900[] = { - {1800, 1600, 24, 80, 1000, 900, 1, 3, VCLK108, /* 60Hz */ - (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 3, 0x3A }, {1760, 1600, 48, 32, 926, 900, 3, 5, VCLK97_75, /* 60Hz CVT RB */ (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo | AST2500PreCatchCRT), 60, 1, 0x3A }, diff --git a/drivers/gpu/drm/bridge/nwl-dsi.c b/drivers/gpu/drm/bridge/nwl-dsi.c index a7389a0facfb4e8f4a54607543c40a2e67aec5b0..af07eeb47ca02c5a69e81933d7e17c1ff6876aff 100644 --- a/drivers/gpu/drm/bridge/nwl-dsi.c +++ b/drivers/gpu/drm/bridge/nwl-dsi.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -196,12 +197,9 @@ static u32 ps2bc(struct nwl_dsi *dsi, unsigned long long ps) /* * ui2bc - UI time periods to byte clock cycles */ -static u32 ui2bc(struct nwl_dsi *dsi, unsigned long long ui) +static u32 ui2bc(unsigned int ui) { - u32 bpp = mipi_dsi_pixel_format_to_bpp(dsi->format); - - return DIV64_U64_ROUND_UP(ui * dsi->lanes, - dsi->mode.clock * 1000 * bpp); + return DIV_ROUND_UP(ui, BITS_PER_BYTE); } /* @@ -232,12 +230,12 @@ static int nwl_dsi_config_host(struct nwl_dsi *dsi) } /* values in byte clock cycles */ - cycles = ui2bc(dsi, cfg->clk_pre); + cycles = ui2bc(cfg->clk_pre); DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_t_pre: 0x%x\n", cycles); nwl_dsi_write(dsi, NWL_DSI_CFG_T_PRE, cycles); cycles = ps2bc(dsi, cfg->lpx + cfg->clk_prepare + cfg->clk_zero); DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_tx_gap (pre): 0x%x\n", cycles); - cycles += ui2bc(dsi, cfg->clk_pre); + cycles += ui2bc(cfg->clk_pre); DRM_DEV_DEBUG_DRIVER(dsi->dev, "cfg_t_post: 0x%x\n", cycles); nwl_dsi_write(dsi, NWL_DSI_CFG_T_POST, cycles); cycles = ps2bc(dsi, cfg->hs_exit); diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 21174efd91becebe30c2d9cf12d8dc7c68404434..88cd992df35602c979fb72b0f95b47acadd2a854 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1327,8 +1327,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state) drm_dbg_atomic(dev, "checking %p\n", state); - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) - requested_crtc |= drm_crtc_mask(crtc); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->enable) + requested_crtc |= drm_crtc_mask(crtc); + } for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { ret = drm_atomic_plane_check(old_plane_state, new_plane_state); @@ -1377,8 +1379,10 @@ int drm_atomic_check_only(struct drm_atomic_state *state) } } - for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) - affected_crtc |= drm_crtc_mask(crtc); + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { + if (new_crtc_state->enable) + affected_crtc |= drm_crtc_mask(crtc); + } /* * For commits that allow modesets drivers can add other CRTCs to the diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index f3d79eda94bb0cb744a52c43c8c725f1c0b1716b..8b3822142fed8d7d14e28044c2cb9b9eb2fc6eee 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -5511,6 +5511,7 @@ int drm_dp_mst_topology_mgr_init(struct drm_dp_mst_topology_mgr *mgr, mutex_init(&mgr->probe_lock); #if IS_ENABLED(CONFIG_DRM_DEBUG_DP_MST_TOPOLOGY_REFS) mutex_init(&mgr->topology_ref_history_lock); + stack_depot_init(); #endif INIT_LIST_HEAD(&mgr->tx_msg_downq); INIT_LIST_HEAD(&mgr->destroy_port_list); diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 7d1c578388d33259c572a93e3cd3a9b592880b60..8257f9d4f6190d828f99cd119495ed82b707b456 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -980,6 +980,10 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size) add_hole(&mm->head_node); mm->scan_active = 0; + +#ifdef CONFIG_DRM_DEBUG_MM + stack_depot_init(); +#endif } EXPORT_SYMBOL(drm_mm_init); diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index c97323365675c9e66b897a0ac7fbe51bf71ca69e..918065982db4614d7d3b62f9356804053e5ae60c 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -107,6 +107,11 @@ static void __drm_stack_depot_print(depot_stack_handle_t stack_depot) kfree(buf); } + +static void __drm_stack_depot_init(void) +{ + stack_depot_init(); +} #else /* CONFIG_DRM_DEBUG_MODESET_LOCK */ static depot_stack_handle_t __drm_stack_depot_save(void) { @@ -115,6 +120,9 @@ static depot_stack_handle_t __drm_stack_depot_save(void) static void __drm_stack_depot_print(depot_stack_handle_t stack_depot) { } +static void __drm_stack_depot_init(void) +{ +} #endif /* CONFIG_DRM_DEBUG_MODESET_LOCK */ /** @@ -359,6 +367,7 @@ void drm_modeset_lock_init(struct drm_modeset_lock *lock) { ww_mutex_init(&lock->mutex, &crtc_ww_class); INIT_LIST_HEAD(&lock->head); + __drm_stack_depot_init(); } EXPORT_SYMBOL(drm_modeset_lock_init); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 042bb80383c932824e331e4ad1e8a2c911dbfac1..b910978d3e48090aedb605a546c096d749736c55 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -115,6 +115,12 @@ static const struct drm_dmi_panel_orientation_data lcd1280x1920_rightside_up = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +static const struct drm_dmi_panel_orientation_data lcd1600x2560_leftside_up = { + .width = 1600, + .height = 2560, + .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP, +}; + static const struct dmi_system_id orientation_data[] = { { /* Acer One 10 (S1003) */ .matches = { @@ -275,6 +281,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Default string"), }, .driver_data = (void *)&onegx1_pro, + }, { /* OneXPlayer */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK TECHNOLOGY CO., LTD."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), + }, + .driver_data = (void *)&lcd1600x2560_leftside_up, }, { /* Samsung GalaxyBook 10.6 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), diff --git a/drivers/gpu/drm/drm_privacy_screen.c b/drivers/gpu/drm/drm_privacy_screen.c index beaf99e9120a609b01166c566d8381e24739bbf2..b688841c18e416e9a234ecd563812a5a196752e0 100644 --- a/drivers/gpu/drm/drm_privacy_screen.c +++ b/drivers/gpu/drm/drm_privacy_screen.c @@ -269,7 +269,7 @@ EXPORT_SYMBOL(drm_privacy_screen_get_state); * * The notifier is called with no locks held. The new hw_state and sw_state * can be retrieved using the drm_privacy_screen_get_state() function. - * A pointer to the drm_privacy_screen's struct is passed as the void *data + * A pointer to the drm_privacy_screen's struct is passed as the ``void *data`` * argument of the notifier_block's notifier_call. * * The notifier will NOT be called when changes are made through diff --git a/drivers/gpu/drm/drm_privacy_screen_x86.c b/drivers/gpu/drm/drm_privacy_screen_x86.c index a2cafb294ca6a1990410e8ee8014fdca55fc4677..e7aa74ad0b241f113af2331352f14e89c814d0f3 100644 --- a/drivers/gpu/drm/drm_privacy_screen_x86.c +++ b/drivers/gpu/drm/drm_privacy_screen_x86.c @@ -33,6 +33,9 @@ static bool __init detect_thinkpad_privacy_screen(void) unsigned long long output; acpi_status status; + if (acpi_disabled) + return false; + /* Get embedded-controller handle */ status = acpi_get_devices("PNP0C09", acpi_set_handle, NULL, &ec_handle); if (ACPI_FAILURE(status) || !ec_handle) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index b03c20c14ca1ebdcf44b3298b74e2f7ca6f303ef..a17313282e8be4b9a2a5403aca97e352edaf67ad 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -469,8 +469,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, return -EINVAL; } - if (args->stream_size > SZ_64K || args->nr_relocs > SZ_64K || - args->nr_bos > SZ_64K || args->nr_pmrs > 128) { + if (args->stream_size > SZ_128K || args->nr_relocs > SZ_128K || + args->nr_bos > SZ_128K || args->nr_pmrs > 128) { DRM_ERROR("submit arguments out of size limits\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index ba5fd012a40a90f73df475076b14f2974fcab8e9..37018bc55810d1385c455ba0b274f79162b33faa 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1047,7 +1047,7 @@ pm_put: void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) { - unsigned int i = 0; + unsigned int i; dev_err(gpu->dev, "recover hung GPU!\n"); @@ -1060,7 +1060,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu) /* complete all events, the GPU won't do it after the reset */ spin_lock(&gpu->event_spinlock); - for_each_set_bit_from(i, gpu->event_bitmap, ETNA_NR_EVENTS) + for_each_set_bit(i, gpu->event_bitmap, ETNA_NR_EVENTS) complete(&gpu->event_free); bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS); spin_unlock(&gpu->event_spinlock); diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index bf7ce684dd8e4b098029667c694ce924b73e7cf8..bb4a85445fc6955f88796d4b7d1b57e961d3dd4c 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -10673,6 +10673,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev, vlv_wm_sanitize(dev_priv); } else if (DISPLAY_VER(dev_priv) >= 9) { skl_wm_get_hw_state(dev_priv); + skl_wm_sanitize(dev_priv); } else if (HAS_PCH_SPLIT(dev_priv)) { ilk_wm_get_hw_state(dev_priv); } diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c index c1439fcb5a9593f5e8009c7330ace21169afb858..3ff149df4a77a7ba3035054769c63ffaf96c39ed 100644 --- a/drivers/gpu/drm/i915/display/intel_drrs.c +++ b/drivers/gpu/drm/i915/display/intel_drrs.c @@ -405,6 +405,7 @@ intel_drrs_init(struct intel_connector *connector, struct drm_display_mode *fixed_mode) { struct drm_i915_private *dev_priv = to_i915(connector->base.dev); + struct intel_encoder *encoder = connector->encoder; struct drm_display_mode *downclock_mode = NULL; INIT_DELAYED_WORK(&dev_priv->drrs.work, intel_drrs_downclock_work); @@ -416,6 +417,13 @@ intel_drrs_init(struct intel_connector *connector, return NULL; } + if ((DISPLAY_VER(dev_priv) < 8 && !HAS_GMCH(dev_priv)) && + encoder->port != PORT_A) { + drm_dbg_kms(&dev_priv->drm, + "DRRS only supported on eDP port A\n"); + return NULL; + } + if (dev_priv->vbt.drrs_type != SEAMLESS_DRRS_SUPPORT) { drm_dbg_kms(&dev_priv->drm, "VBT doesn't support DRRS\n"); return NULL; diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 1a376e9a1ff3caa5cb8ae512cf4063f15ba766d4..d610e48cab94ff243a92e90fa2e5ab6a0d757ff9 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -959,6 +959,9 @@ static int check_overlay_dst(struct intel_overlay *overlay, const struct intel_crtc_state *pipe_config = overlay->crtc->config; + if (rec->dst_height == 0 || rec->dst_width == 0) + return -EINVAL; + if (rec->dst_x < pipe_config->pipe_src_w && rec->dst_x + rec->dst_width <= pipe_config->pipe_src_w && rec->dst_y < pipe_config->pipe_src_h && diff --git a/drivers/gpu/drm/i915/display/intel_tc.c b/drivers/gpu/drm/i915/display/intel_tc.c index 40faa18947c99f6d10b28d2a923ae09ac7ac66f6..dbd7d0d83a141ca33073cfeca89194f56e8d5d1e 100644 --- a/drivers/gpu/drm/i915/display/intel_tc.c +++ b/drivers/gpu/drm/i915/display/intel_tc.c @@ -345,10 +345,11 @@ static bool icl_tc_phy_status_complete(struct intel_digital_port *dig_port) static bool adl_tc_phy_status_complete(struct intel_digital_port *dig_port) { struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev); + enum tc_port tc_port = intel_port_to_tc(i915, dig_port->base.port); struct intel_uncore *uncore = &i915->uncore; u32 val; - val = intel_uncore_read(uncore, TCSS_DDI_STATUS(dig_port->tc_phy_fia_idx)); + val = intel_uncore_read(uncore, TCSS_DDI_STATUS(tc_port)); if (val == 0xffffffff) { drm_dbg_kms(&i915->drm, "Port %s: PHY in TCCOLD, assuming not complete\n", diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 3a5b247be738c820ef5e1c80840583f9b3cedf03..1736efa43339b4f6427097b8669bdc92c73c10c8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2505,9 +2505,14 @@ static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce, timeout) < 0) { i915_request_put(rq); - tl = intel_context_timeline_lock(ce); + /* + * Error path, cannot use intel_context_timeline_lock as + * that is user interruptable and this clean up step + * must be done. + */ + mutex_lock(&ce->timeline->mutex); intel_context_exit(ce); - intel_context_timeline_unlock(tl); + mutex_unlock(&ce->timeline->mutex); if (nonblock) return -EWOULDBLOCK; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index 4b4829eb16c2d3642a536e90c88961e1a1d04361..0dd107dcecc29075172ed2b92373598e518406db 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -311,6 +311,7 @@ struct drm_i915_gem_object { #define I915_BO_READONLY BIT(6) #define I915_TILING_QUIRK_BIT 7 /* unknown swizzling; do not release! */ #define I915_BO_PROTECTED BIT(8) +#define I915_BO_WAS_BOUND_BIT 9 /** * @mem_flags - Mutable placement-related flags * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 9f429ed6e78a13c4eeb8e35e536392f91eed57c6..a50f884973bc1a847b41de98d040b14c48f20cdf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -10,6 +10,8 @@ #include "i915_gem_lmem.h" #include "i915_gem_mman.h" +#include "gt/intel_gt.h" + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -221,6 +223,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) + intel_gt_invalidate_tlbs(to_gt(i915)); + } + return pages; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index ee9612a3ee5eba78355c8c789ecf7f811d8bd404..e130c820ae4efeb68319fa4acbba29c96ebacdbc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -427,11 +427,17 @@ __i915_ttm_move(struct ttm_buffer_object *bo, if (!IS_ERR(fence)) goto out; - } else if (move_deps) { - int err = i915_deps_sync(move_deps, ctx); + } else { + int err = PTR_ERR(fence); + + if (err == -EINTR || err == -ERESTARTSYS || err == -EAGAIN) + return fence; - if (err) - return ERR_PTR(err); + if (move_deps) { + err = i915_deps_sync(move_deps, ctx); + if (err) + return ERR_PTR(err); + } } /* Error intercept failed or no accelerated migration to start with */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index f98f0fb21efbe69b3095402c6655e05b7e378879..35d0fcd3a86cda768cb70e839f1bf6e0498dc791 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -29,6 +29,8 @@ void __intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) { spin_lock_init(>->irq_lock); + mutex_init(>->tlb_invalidate_lock); + INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -912,3 +914,109 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (drm_WARN_ON_ONCE(&engine->i915->drm, + class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +void intel_gt_invalidate_tlbs(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (GRAPHICS_VER(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (GRAPHICS_VER(i915) >= 8 && GRAPHICS_VER(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (GRAPHICS_VER(i915) < 8) { + return; + } + + if (drm_WARN_ONCE(&i915->drm, !num, + "Platform does not implement TLB invalidation!")) + return; + + GEM_TRACE("\n"); + + assert_rpm_wakelock_held(&i915->runtime_pm); + + mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + drm_err_ratelimited(>->i915->drm, + "%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + /* + * Use delayed put since a) we mostly expect a flurry of TLB + * invalidations so it is good to avoid paying the forcewake cost and + * b) it works around a bug in Icelake which cannot cope with too rapid + * transitions. + */ + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); + mutex_unlock(>->tlb_invalidate_lock); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 3ace129eb2af49970fb124a8fe0eb04200fbe82d..a913fb6ffec39941d585b7dc325f3177085e95f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -91,4 +91,6 @@ void intel_gt_info_print(const struct intel_gt_info *info, void intel_gt_watchdog_work(struct work_struct *work); +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 14216cc471b1b403d8b92d4f3b37b132c46d53c1..f206877964908ba82089a48010b886e394936585 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -73,6 +73,8 @@ struct intel_gt { struct intel_uc uc; + struct mutex tlb_invalidate_lock; + struct i915_wa_list wa_list; struct intel_gt_timelines { diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index f9240d4baa696672db678f38966fa5f31427787c..3aabe164c3291a47fb2dc3e38ce11828c03d7829 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -206,6 +206,11 @@ struct intel_guc { * context usage for overflows. */ struct delayed_work work; + + /** + * @shift: Right shift value for the gpm timestamp + */ + u32 shift; } timestamp; #ifdef CONFIG_DRM_I915_SELFTEST diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index e7517206af82187c9974beb54f0741523294c281..154ad726e266a75def45474ffd1fbde652d0cb6d 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -1113,6 +1113,19 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) if (new_start == lower_32_bits(*prev_start)) return; + /* + * When gt is unparked, we update the gt timestamp and start the ping + * worker that updates the gt_stamp every POLL_TIME_CLKS. As long as gt + * is unparked, all switched in contexts will have a start time that is + * within +/- POLL_TIME_CLKS of the most recent gt_stamp. + * + * If neither gt_stamp nor new_start has rolled over, then the + * gt_stamp_hi does not need to be adjusted, however if one of them has + * rolled over, we need to adjust gt_stamp_hi accordingly. + * + * The below conditions address the cases of new_start rollover and + * gt_stamp_last rollover respectively. + */ if (new_start < gt_stamp_last && (new_start - gt_stamp_last) <= POLL_TIME_CLKS) gt_stamp_hi++; @@ -1124,17 +1137,45 @@ __extend_last_switch(struct intel_guc *guc, u64 *prev_start, u32 new_start) *prev_start = ((u64)gt_stamp_hi << 32) | new_start; } -static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +/* + * GuC updates shared memory and KMD reads it. Since this is not synchronized, + * we run into a race where the value read is inconsistent. Sometimes the + * inconsistency is in reading the upper MSB bytes of the last_in value when + * this race occurs. 2 types of cases are seen - upper 8 bits are zero and upper + * 24 bits are zero. Since these are non-zero values, it is non-trivial to + * determine validity of these values. Instead we read the values multiple times + * until they are consistent. In test runs, 3 attempts results in consistent + * values. The upper bound is set to 6 attempts and may need to be tuned as per + * any new occurences. + */ +static void __get_engine_usage_record(struct intel_engine_cs *engine, + u32 *last_in, u32 *id, u32 *total) { struct guc_engine_usage_record *rec = intel_guc_engine_usage(engine); + int i = 0; + + do { + *last_in = READ_ONCE(rec->last_switch_in_stamp); + *id = READ_ONCE(rec->current_context_index); + *total = READ_ONCE(rec->total_runtime); + + if (READ_ONCE(rec->last_switch_in_stamp) == *last_in && + READ_ONCE(rec->current_context_index) == *id && + READ_ONCE(rec->total_runtime) == *total) + break; + } while (++i < 6); +} + +static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) +{ struct intel_engine_guc_stats *stats = &engine->stats.guc; struct intel_guc *guc = &engine->gt->uc.guc; - u32 last_switch = rec->last_switch_in_stamp; - u32 ctx_id = rec->current_context_index; - u32 total = rec->total_runtime; + u32 last_switch, ctx_id, total; lockdep_assert_held(&guc->timestamp.lock); + __get_engine_usage_record(engine, &last_switch, &ctx_id, &total); + stats->running = ctx_id != ~0U && last_switch; if (stats->running) __extend_last_switch(guc, &stats->start_gt_clk, last_switch); @@ -1149,23 +1190,51 @@ static void guc_update_engine_gt_clks(struct intel_engine_cs *engine) } } -static void guc_update_pm_timestamp(struct intel_guc *guc, - struct intel_engine_cs *engine, - ktime_t *now) +static u32 gpm_timestamp_shift(struct intel_gt *gt) +{ + intel_wakeref_t wakeref; + u32 reg, shift; + + with_intel_runtime_pm(gt->uncore->rpm, wakeref) + reg = intel_uncore_read(gt->uncore, RPM_CONFIG0); + + shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT; + + return 3 - shift; +} + +static u64 gpm_timestamp(struct intel_gt *gt) +{ + u32 lo, hi, old_hi, loop = 0; + + hi = intel_uncore_read(gt->uncore, MISC_STATUS1); + do { + lo = intel_uncore_read(gt->uncore, MISC_STATUS0); + old_hi = hi; + hi = intel_uncore_read(gt->uncore, MISC_STATUS1); + } while (old_hi != hi && loop++ < 2); + + return ((u64)hi << 32) | lo; +} + +static void guc_update_pm_timestamp(struct intel_guc *guc, ktime_t *now) { - u32 gt_stamp_now, gt_stamp_hi; + struct intel_gt *gt = guc_to_gt(guc); + u32 gt_stamp_lo, gt_stamp_hi; + u64 gpm_ts; lockdep_assert_held(&guc->timestamp.lock); gt_stamp_hi = upper_32_bits(guc->timestamp.gt_stamp); - gt_stamp_now = intel_uncore_read(engine->uncore, - RING_TIMESTAMP(engine->mmio_base)); + gpm_ts = gpm_timestamp(gt) >> guc->timestamp.shift; + gt_stamp_lo = lower_32_bits(gpm_ts); *now = ktime_get(); - if (gt_stamp_now < lower_32_bits(guc->timestamp.gt_stamp)) + if (gt_stamp_lo < lower_32_bits(guc->timestamp.gt_stamp)) gt_stamp_hi++; - guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_now; + guc->timestamp.gt_stamp = ((u64)gt_stamp_hi << 32) | gt_stamp_lo; } /* @@ -1208,8 +1277,12 @@ static ktime_t guc_engine_busyness(struct intel_engine_cs *engine, ktime_t *now) if (!in_reset && intel_gt_pm_get_if_awake(gt)) { stats_saved = *stats; gt_stamp_saved = guc->timestamp.gt_stamp; + /* + * Update gt_clks, then gt timestamp to simplify the 'gt_stamp - + * start_gt_clk' calculation below for active engines. + */ guc_update_engine_gt_clks(engine); - guc_update_pm_timestamp(guc, engine, now); + guc_update_pm_timestamp(guc, now); intel_gt_pm_put_async(gt); if (i915_reset_count(gpu_error) != reset_count) { *stats = stats_saved; @@ -1241,8 +1314,8 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc) spin_lock_irqsave(&guc->timestamp.lock, flags); + guc_update_pm_timestamp(guc, &unused); for_each_engine(engine, gt, id) { - guc_update_pm_timestamp(guc, engine, &unused); guc_update_engine_gt_clks(engine); engine->stats.guc.prev_total = 0; } @@ -1259,10 +1332,11 @@ static void __update_guc_busyness_stats(struct intel_guc *guc) ktime_t unused; spin_lock_irqsave(&guc->timestamp.lock, flags); - for_each_engine(engine, gt, id) { - guc_update_pm_timestamp(guc, engine, &unused); + + guc_update_pm_timestamp(guc, &unused); + for_each_engine(engine, gt, id) guc_update_engine_gt_clks(engine); - } + spin_unlock_irqrestore(&guc->timestamp.lock, flags); } @@ -1335,10 +1409,15 @@ void intel_guc_busyness_park(struct intel_gt *gt) void intel_guc_busyness_unpark(struct intel_gt *gt) { struct intel_guc *guc = >->uc.guc; + unsigned long flags; + ktime_t unused; if (!guc_submission_initialized(guc)) return; + spin_lock_irqsave(&guc->timestamp.lock, flags); + guc_update_pm_timestamp(guc, &unused); + spin_unlock_irqrestore(&guc->timestamp.lock, flags); mod_delayed_work(system_highpri_wq, &guc->timestamp.work, guc->timestamp.ping_delay); } @@ -1783,6 +1862,7 @@ int intel_guc_submission_init(struct intel_guc *guc) spin_lock_init(&guc->timestamp.lock); INIT_DELAYED_WORK(&guc->timestamp.work, guc_timestamp_ping); guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ; + guc->timestamp.shift = gpm_timestamp_shift(gt); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5ae812d60abe5c187cb69b84862f7f86b2887872..0633888a411eed3c4dfc9fe07ae123016ca6585d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1522,7 +1522,7 @@ capture_engine(struct intel_engine_cs *engine, struct i915_request *rq = NULL; unsigned long flags; - ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); + ee = intel_engine_coredump_alloc(engine, ALLOW_FAIL); if (!ee) return NULL; diff --git a/drivers/gpu/drm/i915/i915_mm.h b/drivers/gpu/drm/i915/i915_mm.h index 76f1d53bdf34c2ad72cc267fc73d26aa089f7bb9..3ad22bbe80ebfcbc7239b19ea51ac9ce7ea706da 100644 --- a/drivers/gpu/drm/i915/i915_mm.h +++ b/drivers/gpu/drm/i915/i915_mm.h @@ -6,6 +6,7 @@ #ifndef __I915_MM_H__ #define __I915_MM_H__ +#include #include struct vm_area_struct; diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 170bba913c30c62c572945471f426f6e8990f0de..e27f3b7cf094e73ab51657f97481241751670c12 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -4273,26 +4273,6 @@ static struct ctl_table oa_table[] = { {} }; -static struct ctl_table i915_root[] = { - { - .procname = "i915", - .maxlen = 0, - .mode = 0555, - .child = oa_table, - }, - {} -}; - -static struct ctl_table dev_root[] = { - { - .procname = "dev", - .maxlen = 0, - .mode = 0555, - .child = i915_root, - }, - {} -}; - static void oa_init_supported_formats(struct i915_perf *perf) { struct drm_i915_private *i915 = perf->i915; @@ -4488,7 +4468,7 @@ static int destroy_config(int id, void *p, void *data) int i915_perf_sysctl_register(void) { - sysctl_header = register_sysctl_table(dev_root); + sysctl_header = register_sysctl("dev/i915", oa_table); return 0; } diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 971d601fe75123b2b65f3529ef6c4a5ee6a214dc..c2bb33febb68e45354c608f9c7fae2b204530d3e 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2684,7 +2684,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define RING_WAIT (1 << 11) /* gen3+, PRBx_CTL */ #define RING_WAIT_SEMAPHORE (1 << 10) /* gen6+ */ -#define GUCPMTIMESTAMP _MMIO(0xC3E8) +#define MISC_STATUS0 _MMIO(0xA500) +#define MISC_STATUS1 _MMIO(0xA504) /* There are 16 64-bit CS General Purpose Registers per-engine on Gen8+ */ #define GEN8_RING_CS_GPR(base, n) _MMIO((base) + 0x600 + (n) * 8) @@ -2721,6 +2722,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) @@ -2819,6 +2826,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) +#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) + #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) #define FPGA_DBG _MMIO(0x42300) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 29a858c53bdd0f32493329a10bc581be9839a5d7..c0d6d5526abe614d1caa0503a9c2221be290154f 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -457,6 +457,9 @@ int i915_vma_bind(struct i915_vma *vma, vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + atomic_or(bind_flags, &vma->flags); return 0; } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 434b1f8b7fe3837d383d298733bc9281335c0e07..3edba7fd0c4907e94a5f75393ce3dbac2eb3d742 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4717,6 +4717,10 @@ static const struct dbuf_slice_conf_entry dg2_allowed_dbufs[] = { }; static const struct dbuf_slice_conf_entry adlp_allowed_dbufs[] = { + /* + * Keep the join_mbus cases first so check_mbus_joined() + * will prefer them over the !join_mbus cases. + */ { .active_pipes = BIT(PIPE_A), .dbuf_mask = { @@ -4731,6 +4735,20 @@ static const struct dbuf_slice_conf_entry adlp_allowed_dbufs[] = { }, .join_mbus = true, }, + { + .active_pipes = BIT(PIPE_A), + .dbuf_mask = { + [PIPE_A] = BIT(DBUF_S1) | BIT(DBUF_S2), + }, + .join_mbus = false, + }, + { + .active_pipes = BIT(PIPE_B), + .dbuf_mask = { + [PIPE_B] = BIT(DBUF_S3) | BIT(DBUF_S4), + }, + .join_mbus = false, + }, { .active_pipes = BIT(PIPE_A) | BIT(PIPE_B), .dbuf_mask = { @@ -4847,13 +4865,14 @@ static bool adlp_check_mbus_joined(u8 active_pipes) return check_mbus_joined(active_pipes, adlp_allowed_dbufs); } -static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, +static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus, const struct dbuf_slice_conf_entry *dbuf_slices) { int i; for (i = 0; i < dbuf_slices[i].active_pipes; i++) { - if (dbuf_slices[i].active_pipes == active_pipes) + if (dbuf_slices[i].active_pipes == active_pipes && + dbuf_slices[i].join_mbus == join_mbus) return dbuf_slices[i].dbuf_mask[pipe]; } return 0; @@ -4864,7 +4883,7 @@ static u8 compute_dbuf_slices(enum pipe pipe, u8 active_pipes, * returns correspondent DBuf slice mask as stated in BSpec for particular * platform. */ -static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes) +static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus) { /* * FIXME: For ICL this is still a bit unclear as prev BSpec revision @@ -4878,37 +4897,41 @@ static u8 icl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes) * still here - we will need it once those additional constraints * pop up. */ - return compute_dbuf_slices(pipe, active_pipes, icl_allowed_dbufs); + return compute_dbuf_slices(pipe, active_pipes, join_mbus, + icl_allowed_dbufs); } -static u8 tgl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes) +static u8 tgl_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus) { - return compute_dbuf_slices(pipe, active_pipes, tgl_allowed_dbufs); + return compute_dbuf_slices(pipe, active_pipes, join_mbus, + tgl_allowed_dbufs); } -static u32 adlp_compute_dbuf_slices(enum pipe pipe, u32 active_pipes) +static u8 adlp_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus) { - return compute_dbuf_slices(pipe, active_pipes, adlp_allowed_dbufs); + return compute_dbuf_slices(pipe, active_pipes, join_mbus, + adlp_allowed_dbufs); } -static u32 dg2_compute_dbuf_slices(enum pipe pipe, u32 active_pipes) +static u8 dg2_compute_dbuf_slices(enum pipe pipe, u8 active_pipes, bool join_mbus) { - return compute_dbuf_slices(pipe, active_pipes, dg2_allowed_dbufs); + return compute_dbuf_slices(pipe, active_pipes, join_mbus, + dg2_allowed_dbufs); } -static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes) +static u8 skl_compute_dbuf_slices(struct intel_crtc *crtc, u8 active_pipes, bool join_mbus) { struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); enum pipe pipe = crtc->pipe; if (IS_DG2(dev_priv)) - return dg2_compute_dbuf_slices(pipe, active_pipes); + return dg2_compute_dbuf_slices(pipe, active_pipes, join_mbus); else if (IS_ALDERLAKE_P(dev_priv)) - return adlp_compute_dbuf_slices(pipe, active_pipes); + return adlp_compute_dbuf_slices(pipe, active_pipes, join_mbus); else if (DISPLAY_VER(dev_priv) == 12) - return tgl_compute_dbuf_slices(pipe, active_pipes); + return tgl_compute_dbuf_slices(pipe, active_pipes, join_mbus); else if (DISPLAY_VER(dev_priv) == 11) - return icl_compute_dbuf_slices(pipe, active_pipes); + return icl_compute_dbuf_slices(pipe, active_pipes, join_mbus); /* * For anything else just return one slice yet. * Should be extended for other platforms. @@ -6127,11 +6150,16 @@ skl_compute_ddb(struct intel_atomic_state *state) return ret; } + if (IS_ALDERLAKE_P(dev_priv)) + new_dbuf_state->joined_mbus = + adlp_check_mbus_joined(new_dbuf_state->active_pipes); + for_each_intel_crtc(&dev_priv->drm, crtc) { enum pipe pipe = crtc->pipe; new_dbuf_state->slices[pipe] = - skl_compute_dbuf_slices(crtc, new_dbuf_state->active_pipes); + skl_compute_dbuf_slices(crtc, new_dbuf_state->active_pipes, + new_dbuf_state->joined_mbus); if (old_dbuf_state->slices[pipe] == new_dbuf_state->slices[pipe]) continue; @@ -6143,9 +6171,6 @@ skl_compute_ddb(struct intel_atomic_state *state) new_dbuf_state->enabled_slices = intel_dbuf_enabled_slices(new_dbuf_state); - if (IS_ALDERLAKE_P(dev_priv)) - new_dbuf_state->joined_mbus = adlp_check_mbus_joined(new_dbuf_state->active_pipes); - if (old_dbuf_state->enabled_slices != new_dbuf_state->enabled_slices || old_dbuf_state->joined_mbus != new_dbuf_state->joined_mbus) { ret = intel_atomic_serialize_global_state(&new_dbuf_state->base); @@ -6626,6 +6651,7 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) enum pipe pipe = crtc->pipe; unsigned int mbus_offset; enum plane_id plane_id; + u8 slices; skl_pipe_wm_get_hw_state(crtc, &crtc_state->wm.skl.optimal); crtc_state->wm.skl.raw = crtc_state->wm.skl.optimal; @@ -6645,19 +6671,22 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) skl_ddb_entry_union(&dbuf_state->ddb[pipe], ddb_uv); } - dbuf_state->slices[pipe] = - skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes); - dbuf_state->weight[pipe] = intel_crtc_ddb_weight(crtc_state); /* * Used for checking overlaps, so we need absolute * offsets instead of MBUS relative offsets. */ - mbus_offset = mbus_ddb_offset(dev_priv, dbuf_state->slices[pipe]); + slices = skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes, + dbuf_state->joined_mbus); + mbus_offset = mbus_ddb_offset(dev_priv, slices); crtc_state->wm.skl.ddb.start = mbus_offset + dbuf_state->ddb[pipe].start; crtc_state->wm.skl.ddb.end = mbus_offset + dbuf_state->ddb[pipe].end; + /* The slices actually used by the planes on the pipe */ + dbuf_state->slices[pipe] = + skl_ddb_dbuf_slice_mask(dev_priv, &crtc_state->wm.skl.ddb); + drm_dbg_kms(&dev_priv->drm, "[CRTC:%d:%s] dbuf slices 0x%x, ddb (%d - %d), active pipes 0x%x, mbus joined: %s\n", crtc->base.base.id, crtc->base.name, @@ -6669,6 +6698,74 @@ void skl_wm_get_hw_state(struct drm_i915_private *dev_priv) dbuf_state->enabled_slices = dev_priv->dbuf.enabled_slices; } +static bool skl_dbuf_is_misconfigured(struct drm_i915_private *i915) +{ + const struct intel_dbuf_state *dbuf_state = + to_intel_dbuf_state(i915->dbuf.obj.state); + struct skl_ddb_entry entries[I915_MAX_PIPES] = {}; + struct intel_crtc *crtc; + + for_each_intel_crtc(&i915->drm, crtc) { + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + entries[crtc->pipe] = crtc_state->wm.skl.ddb; + } + + for_each_intel_crtc(&i915->drm, crtc) { + const struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + u8 slices; + + slices = skl_compute_dbuf_slices(crtc, dbuf_state->active_pipes, + dbuf_state->joined_mbus); + if (dbuf_state->slices[crtc->pipe] & ~slices) + return true; + + if (skl_ddb_allocation_overlaps(&crtc_state->wm.skl.ddb, entries, + I915_MAX_PIPES, crtc->pipe)) + return true; + } + + return false; +} + +void skl_wm_sanitize(struct drm_i915_private *i915) +{ + struct intel_crtc *crtc; + + /* + * On TGL/RKL (at least) the BIOS likes to assign the planes + * to the wrong DBUF slices. This will cause an infinite loop + * in skl_commit_modeset_enables() as it can't find a way to + * transition between the old bogus DBUF layout to the new + * proper DBUF layout without DBUF allocation overlaps between + * the planes (which cannot be allowed or else the hardware + * may hang). If we detect a bogus DBUF layout just turn off + * all the planes so that skl_commit_modeset_enables() can + * simply ignore them. + */ + if (!skl_dbuf_is_misconfigured(i915)) + return; + + drm_dbg_kms(&i915->drm, "BIOS has misprogrammed the DBUF, disabling all planes\n"); + + for_each_intel_crtc(&i915->drm, crtc) { + struct intel_plane *plane = to_intel_plane(crtc->base.primary); + const struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); + struct intel_crtc_state *crtc_state = + to_intel_crtc_state(crtc->base.state); + + if (plane_state->uapi.visible) + intel_plane_disable_noatomic(crtc, plane); + + drm_WARN_ON(&i915->drm, crtc_state->active_planes != 0); + + memset(&crtc_state->wm.skl.ddb, 0, sizeof(crtc_state->wm.skl.ddb)); + } +} + static void ilk_pipe_wm_get_hw_state(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; diff --git a/drivers/gpu/drm/i915/intel_pm.h b/drivers/gpu/drm/i915/intel_pm.h index 990cdcaf85cec5d9c90c507bbc5d7dfdf2af82bc..d2243653a89300068baba4fec9b763792eea9e68 100644 --- a/drivers/gpu/drm/i915/intel_pm.h +++ b/drivers/gpu/drm/i915/intel_pm.h @@ -47,6 +47,7 @@ void skl_pipe_wm_get_hw_state(struct intel_crtc *crtc, struct skl_pipe_wm *out); void g4x_wm_sanitize(struct drm_i915_private *dev_priv); void vlv_wm_sanitize(struct drm_i915_private *dev_priv); +void skl_wm_sanitize(struct drm_i915_private *dev_priv); bool intel_can_enable_sagv(struct drm_i915_private *dev_priv, const struct intel_bw_state *bw_state); void intel_sagv_pre_plane_update(struct intel_atomic_state *state); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 22dab36afcb62c46dca82f9eb9628886b5a30dd2..64c2708efc9ecc0a30f5b9bb81906d586c70e595 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -68,6 +68,7 @@ static noinline depot_stack_handle_t __save_depot_stack(void) static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { spin_lock_init(&rpm->debug.lock); + stack_depot_init(); } static noinline depot_stack_handle_t diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index fc25ebf1a593e2b01320eba1a981016f238affee..778da3179b3cc8325b17f828b8bbb6f594a80d0c 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -724,7 +724,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore, } static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, - enum forcewake_domains fw_domains) + enum forcewake_domains fw_domains, + bool delayed) { struct intel_uncore_forcewake_domain *domain; unsigned int tmp; @@ -739,7 +740,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, continue; } - fw_domains_put(uncore, domain->mask); + if (delayed && + !(domain->uncore->fw_domains_timer & domain->mask)) + fw_domain_arm_timer(domain); + else + fw_domains_put(uncore, domain->mask); } } @@ -760,7 +765,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore, return; spin_lock_irqsave(&uncore->lock, irqflags); - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); + spin_unlock_irqrestore(&uncore->lock, irqflags); +} + +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains fw_domains) +{ + unsigned long irqflags; + + if (!uncore->fw_get_funcs) + return; + + spin_lock_irqsave(&uncore->lock, irqflags); + __intel_uncore_forcewake_put(uncore, fw_domains, true); spin_unlock_irqrestore(&uncore->lock, irqflags); } @@ -802,7 +820,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore, if (!uncore->fw_get_funcs) return; - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); } void assert_forcewakes_inactive(struct intel_uncore *uncore) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index 210fe2a7161298cacb41f99a50c50a54e684026b..2a15b2b2e2fcb9aae8e6c9d73c9fb74ffe579387 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -246,6 +246,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore, enum forcewake_domains domains); void intel_uncore_forcewake_put(struct intel_uncore *uncore, enum forcewake_domains domains); +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains domains); void intel_uncore_forcewake_flush(struct intel_uncore *uncore, enum forcewake_domains fw_domains); diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c index 00404ba4126ddcb787fdbc939755578c23875674..2735b8eb35376eaa2f9a751c01c120dabec9be3c 100644 --- a/drivers/gpu/drm/kmb/kmb_plane.c +++ b/drivers/gpu/drm/kmb/kmb_plane.c @@ -158,12 +158,6 @@ static void kmb_plane_atomic_disable(struct drm_plane *plane, case LAYER_1: kmb->plane_status[plane_id].ctrl = LCD_CTRL_VL2_ENABLE; break; - case LAYER_2: - kmb->plane_status[plane_id].ctrl = LCD_CTRL_GL1_ENABLE; - break; - case LAYER_3: - kmb->plane_status[plane_id].ctrl = LCD_CTRL_GL2_ENABLE; - break; } kmb->plane_status[plane_id].disable = true; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 51b83776951b042270d3c97ae31fa6014b67c15f..17cfad6424db6b9946c2e2e6b0f69c17488acaf4 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1560,6 +1560,8 @@ static int a6xx_pm_suspend(struct msm_gpu *gpu) for (i = 0; i < gpu->nr_rings; i++) a6xx_gpu->shadow[i] = 0; + gpu->suspend_count++; + return 0; } diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 93005839b5da1cd49de8f2c7d7efc94eb1b27bce..fb261930ad1c70ad63c8af5783ec8246b75448bc 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -608,9 +608,27 @@ static int adreno_resume(struct device *dev) return gpu->funcs->pm_resume(gpu); } +static int active_submits(struct msm_gpu *gpu) +{ + int active_submits; + mutex_lock(&gpu->active_lock); + active_submits = gpu->active_submits; + mutex_unlock(&gpu->active_lock); + return active_submits; +} + static int adreno_suspend(struct device *dev) { struct msm_gpu *gpu = dev_to_gpu(dev); + int remaining; + + remaining = wait_event_timeout(gpu->retire_event, + active_submits(gpu) == 0, + msecs_to_jiffies(1000)); + if (remaining == 0) { + dev_err(dev, "Timeout waiting for GPU to suspend\n"); + return -EBUSY; + } return gpu->funcs->pm_suspend(gpu); } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index a98e964c3b6fa6941691dbf0ed81fa52378b5713..355894a3b48c376612e2491bbbb5f88b8be3eb4e 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c @@ -26,9 +26,16 @@ static void dpu_setup_dspp_pcc(struct dpu_hw_dspp *ctx, struct dpu_hw_pcc_cfg *cfg) { - u32 base = ctx->cap->sblk->pcc.base; + u32 base; - if (!ctx || !base) { + if (!ctx) { + DRM_ERROR("invalid ctx %pK\n", ctx); + return; + } + + base = ctx->cap->sblk->pcc.base; + + if (!base) { DRM_ERROR("invalid ctx %pK pcc base 0x%x\n", ctx, base); return; } diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 052548883d27d2b32bd74e5d0a94ade3f359830a..0fe02529b5e7a34ea2900859165c5fdaf9b5369a 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -40,7 +40,12 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi) of_node_put(phy_node); - if (!phy_pdev || !msm_dsi->phy) { + if (!phy_pdev) { + DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); + return -EPROBE_DEFER; + } + if (!msm_dsi->phy) { + put_device(&phy_pdev->dev); DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); return -EPROBE_DEFER; } diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index c2ed177717c74c0552a97bc68d7e65b2971be41f..2027b38617ab2d11487b4c62a236def96d5793a0 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -808,12 +808,14 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy, struct msm_dsi_phy_clk_request *clk_req, struct msm_dsi_phy_shared_timings *shared_timings) { - struct device *dev = &phy->pdev->dev; + struct device *dev; int ret; if (!phy || !phy->cfg->ops.enable) return -EINVAL; + dev = &phy->pdev->dev; + ret = dsi_phy_enable_resource(phy); if (ret) { DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n", diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 3acdeae25caf09e0c093132b4572a012c57f81e0..719720709e9e754316580bf1eafcb62b08af5d1f 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -97,10 +97,15 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi) of_node_put(phy_node); - if (!phy_pdev || !hdmi->phy) { + if (!phy_pdev) { DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); return -EPROBE_DEFER; } + if (!hdmi->phy) { + DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); + put_device(&phy_pdev->dev); + return -EPROBE_DEFER; + } hdmi->phy_dev = get_device(&phy_pdev->dev); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index ad35a5d940531792ab918bc052f6b05a30ff49d2..555666e3f9603a2305a44c84453a56ceda2b9abf 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -461,7 +461,7 @@ static int msm_init_vram(struct drm_device *dev) of_node_put(node); if (ret) return ret; - size = r.end - r.start; + size = r.end - r.start + 1; DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start); /* if we have no IOMMU, then we need to use carveout allocator. @@ -510,7 +510,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) struct msm_drm_private *priv = dev_get_drvdata(dev); struct drm_device *ddev; struct msm_kms *kms; - struct msm_mdss *mdss; int ret, i; ddev = drm_dev_alloc(drv, dev); @@ -521,8 +520,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv) ddev->dev_private = priv; priv->dev = ddev; - mdss = priv->mdss; - priv->wq = alloc_ordered_workqueue("msm", 0); priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 0f78c26152722173251cce26f68222501dad944a..2c1049c0ea14c06bb229faf87db624f57fbc36a4 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -703,6 +703,8 @@ static void retire_submits(struct msm_gpu *gpu) } } } + + wake_up_all(&gpu->retire_event); } static void retire_worker(struct kthread_work *work) @@ -848,6 +850,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, INIT_LIST_HEAD(&gpu->active_list); mutex_init(&gpu->active_lock); mutex_init(&gpu->lock); + init_waitqueue_head(&gpu->retire_event); kthread_init_work(&gpu->retire_work, retire_worker); kthread_init_work(&gpu->recover_work, recover_worker); kthread_init_work(&gpu->fault_work, fault_worker); diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 445c6bfd4b6b740c8568953f98f5d384ec4fccca..92aa1e9196c64b483ac75e74d882a40ad56da1f6 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -230,6 +230,9 @@ struct msm_gpu { /* work for handling GPU recovery: */ struct kthread_work recover_work; + /** retire_event: notified when submits are retired: */ + wait_queue_head_t retire_event; + /* work for handling active-list retiring: */ struct kthread_work retire_work; diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index 62405e98092508b81b14d86dd67df58fbda2c5fd..9bf319be11f6035cf01b092cd4e689b868eaca1e 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -133,6 +133,18 @@ void msm_devfreq_init(struct msm_gpu *gpu) CLOCK_MONOTONIC, HRTIMER_MODE_REL); } +static void cancel_idle_work(struct msm_gpu_devfreq *df) +{ + hrtimer_cancel(&df->idle_work.timer); + kthread_cancel_work_sync(&df->idle_work.work); +} + +static void cancel_boost_work(struct msm_gpu_devfreq *df) +{ + hrtimer_cancel(&df->boost_work.timer); + kthread_cancel_work_sync(&df->boost_work.work); +} + void msm_devfreq_cleanup(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; @@ -152,7 +164,12 @@ void msm_devfreq_resume(struct msm_gpu *gpu) void msm_devfreq_suspend(struct msm_gpu *gpu) { - devfreq_suspend_device(gpu->devfreq.devfreq); + struct msm_gpu_devfreq *df = &gpu->devfreq; + + devfreq_suspend_device(df->devfreq); + + cancel_idle_work(df); + cancel_boost_work(df); } static void msm_devfreq_boost_work(struct kthread_work *work) @@ -196,7 +213,7 @@ void msm_devfreq_active(struct msm_gpu *gpu) /* * Cancel any pending transition to idle frequency: */ - hrtimer_cancel(&df->idle_work.timer); + cancel_idle_work(df); idle_time = ktime_to_ms(ktime_sub(ktime_get(), df->idle_time)); diff --git a/drivers/gpu/drm/mxsfb/mxsfb_kms.c b/drivers/gpu/drm/mxsfb/mxsfb_kms.c index 0655582ae8ed64f2e7eb822a229f69b992aae69a..4cfb6c001679924801bdd37ffdb1455f753409dc 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_kms.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_kms.c @@ -361,7 +361,11 @@ static void mxsfb_crtc_atomic_enable(struct drm_crtc *crtc, bridge_state = drm_atomic_get_new_bridge_state(state, mxsfb->bridge); - bus_format = bridge_state->input_bus_cfg.format; + if (!bridge_state) + bus_format = MEDIA_BUS_FMT_FIXED; + else + bus_format = bridge_state->input_bus_cfg.format; + if (bus_format == MEDIA_BUS_FMT_FIXED) { dev_warn_once(drm->dev, "Bridge does not provide bus format, assuming MEDIA_BUS_FMT_RGB888_1X24.\n" diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c index d0f52d59fc2f9f6eef9ef79e123ace125c9e1bc4..64e423dddd9e7fe01209838e391a7e4761a6183c 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c @@ -38,7 +38,7 @@ nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size) *addr += bios->imaged_addr; } - if (unlikely(*addr + size >= bios->size)) { + if (unlikely(*addr + size > bios->size)) { nvkm_error(&bios->subdev, "OOB %d %08x %08x\n", size, p, *addr); return false; } diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 9e46db5e359c7a16f563476f2528b81d6c77db7b..3c08f9827acf1935efbe0ee0f61c07a6d246cce0 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -588,6 +588,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) err = panel_dpi_probe(dev, panel); if (err) goto free_ddc; + desc = panel->desc; } else { if (!of_get_display_timing(dev->of_node, "panel-timing", &dt)) panel_simple_parse_panel_timing_node(dev, panel, &dt); diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index 830bdd5e9b7ce16cbb033a33969800fc47292a78..8677c82716784ae5386b29644e3dc23fbc3944c9 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -529,13 +529,6 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, return ret; } - ret = clk_prepare_enable(hdmi->vpll_clk); - if (ret) { - DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI vpll: %d\n", - ret); - return ret; - } - hdmi->phy = devm_phy_optional_get(dev, "hdmi"); if (IS_ERR(hdmi->phy)) { ret = PTR_ERR(hdmi->phy); @@ -544,6 +537,13 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, return ret; } + ret = clk_prepare_enable(hdmi->vpll_clk); + if (ret) { + DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI vpll: %d\n", + ret); + return ret; + } + drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs); drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS); diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c index 1f7353f0684a0e472a6c941ef0456e677908b5c6..798b542e59164c5d7b1e05b3e517bed0732ba77e 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c @@ -902,6 +902,7 @@ static const struct vop_win_phy rk3399_win01_data = { .enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0), .format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1), .rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12), + .x_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 21), .y_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 22), .act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0), .dsp_info = VOP_REG(RK3288_WIN0_DSP_INFO, 0x0fff0fff, 0), @@ -912,6 +913,7 @@ static const struct vop_win_phy rk3399_win01_data = { .uv_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 16), .src_alpha_ctl = VOP_REG(RK3288_WIN0_SRC_ALPHA_CTRL, 0xff, 0), .dst_alpha_ctl = VOP_REG(RK3288_WIN0_DST_ALPHA_CTRL, 0xff, 0), + .channel = VOP_REG(RK3288_WIN0_CTRL2, 0xff, 0), }; /* @@ -922,11 +924,11 @@ static const struct vop_win_phy rk3399_win01_data = { static const struct vop_win_data rk3399_vop_win_data[] = { { .base = 0x00, .phy = &rk3399_win01_data, .type = DRM_PLANE_TYPE_PRIMARY }, - { .base = 0x40, .phy = &rk3288_win01_data, + { .base = 0x40, .phy = &rk3368_win01_data, .type = DRM_PLANE_TYPE_OVERLAY }, - { .base = 0x00, .phy = &rk3288_win23_data, + { .base = 0x00, .phy = &rk3368_win23_data, .type = DRM_PLANE_TYPE_OVERLAY }, - { .base = 0x50, .phy = &rk3288_win23_data, + { .base = 0x50, .phy = &rk3368_win23_data, .type = DRM_PLANE_TYPE_CURSOR }, }; diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 287dbc89ad642eb63af4ef2682f831a26d49e577..e6cc47470e03094f42e02f676198399d16d70205 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -671,7 +671,6 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc, const struct drm_display_mode *mode = &crtc_state->adjusted_mode; struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder); - mode = &crtc_state->adjusted_mode; if (vc4_encoder->type == VC4_ENCODER_TYPE_HDMI0) { vc4_state->hvs_load = max(mode->clock * mode->hdisplay / mode->htotal + 1000, mode->clock * 9 / 10) * 1000; diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index a229da58962a2ec458b680759cc8d878d52a87e8..9300d3354c512af8aecf8d5ab35ef5885fea0369 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -1262,7 +1262,6 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { struct vc4_dsi *dsi = host_to_dsi(host); - int ret; dsi->lanes = device->lanes; dsi->channel = device->channel; @@ -1297,18 +1296,15 @@ static int vc4_dsi_host_attach(struct mipi_dsi_host *host, return 0; } - ret = component_add(&dsi->pdev->dev, &vc4_dsi_ops); - if (ret) { - mipi_dsi_host_unregister(&dsi->dsi_host); - return ret; - } - - return 0; + return component_add(&dsi->pdev->dev, &vc4_dsi_ops); } static int vc4_dsi_host_detach(struct mipi_dsi_host *host, struct mipi_dsi_device *device) { + struct vc4_dsi *dsi = host_to_dsi(host); + + component_del(&dsi->pdev->dev, &vc4_dsi_ops); return 0; } @@ -1686,9 +1682,7 @@ static int vc4_dsi_dev_remove(struct platform_device *pdev) struct device *dev = &pdev->dev; struct vc4_dsi *dsi = dev_get_drvdata(dev); - component_del(&pdev->dev, &vc4_dsi_ops); mipi_dsi_host_unregister(&dsi->dsi_host); - return 0; } diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 053fbaf765ca6f69ce9d9c5efde88a5c9febcc2f..b30500405fa79ddd9bfc92b215f482d4ee7a0e2c 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -196,14 +196,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) if (gpiod_get_value_cansleep(vc4_hdmi->hpd_gpio)) connected = true; } else { - unsigned long flags; - u32 hotplug; - - spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); - hotplug = HDMI_READ(HDMI_HOTPLUG); - spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); - - if (hotplug & VC4_HDMI_HOTPLUG_CONNECTED) + if (vc4_hdmi->variant->hp_detect && + vc4_hdmi->variant->hp_detect(vc4_hdmi)) connected = true; } @@ -1251,6 +1245,7 @@ static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder, unsigned long long tmds_rate; if (vc4_hdmi->variant->unsupported_odd_h_timings && + !(mode->flags & DRM_MODE_FLAG_DBLCLK) && ((mode->hdisplay % 2) || (mode->hsync_start % 2) || (mode->hsync_end % 2) || (mode->htotal % 2))) return -EINVAL; @@ -1298,6 +1293,7 @@ vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder, struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); if (vc4_hdmi->variant->unsupported_odd_h_timings && + !(mode->flags & DRM_MODE_FLAG_DBLCLK) && ((mode->hdisplay % 2) || (mode->hsync_start % 2) || (mode->hsync_end % 2) || (mode->htotal % 2))) return MODE_H_ILLEGAL; @@ -1343,6 +1339,18 @@ static u32 vc5_hdmi_channel_map(struct vc4_hdmi *vc4_hdmi, u32 channel_mask) return channel_map; } +static bool vc5_hdmi_hp_detect(struct vc4_hdmi *vc4_hdmi) +{ + unsigned long flags; + u32 hotplug; + + spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); + hotplug = HDMI_READ(HDMI_HOTPLUG); + spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); + + return !!(hotplug & VC4_HDMI_HOTPLUG_CONNECTED); +} + /* HDMI audio codec callbacks */ static void vc4_hdmi_audio_set_mai_clock(struct vc4_hdmi *vc4_hdmi, unsigned int samplerate) @@ -2504,7 +2512,8 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) * vc4_hdmi_disable_scrambling() will thus run at boot, make * sure it's disabled, and avoid any inconsistency. */ - vc4_hdmi->scdc_enabled = true; + if (variant->max_pixel_clock > HDMI_14_MAX_TMDS_CLK) + vc4_hdmi->scdc_enabled = true; ret = variant->init_resources(vc4_hdmi); if (ret) @@ -2723,6 +2732,7 @@ static const struct vc4_hdmi_variant bcm2711_hdmi0_variant = { .phy_rng_disable = vc5_hdmi_phy_rng_disable, .channel_map = vc5_hdmi_channel_map, .supports_hdr = true, + .hp_detect = vc5_hdmi_hp_detect, }; static const struct vc4_hdmi_variant bcm2711_hdmi1_variant = { @@ -2751,6 +2761,7 @@ static const struct vc4_hdmi_variant bcm2711_hdmi1_variant = { .phy_rng_disable = vc5_hdmi_phy_rng_disable, .channel_map = vc5_hdmi_channel_map, .supports_hdr = true, + .hp_detect = vc5_hdmi_hp_detect, }; static const struct of_device_id vc4_hdmi_dt_match[] = { diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h index 36c0b082a43b274e8d5da18a4fc8e8e4e0c32ed5..31b77a94c526deaaf3470e3667e8f65e52a7b5f3 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi.h @@ -102,6 +102,9 @@ struct vc4_hdmi_variant { /* Enables HDR metadata */ bool supports_hdr; + + /* Callback for hardware specific hotplug detect */ + bool (*hp_detect)(struct vc4_hdmi *vc4_hdmi); }; /* HDMI audio information */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d6b66636a19b8327b2f661625630a9da5f629686..ea3ecdda561dc3fe9a2e565e1273b167f138edb7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1140,15 +1140,14 @@ extern int vmw_execbuf_fence_commands(struct drm_file *file_priv, struct vmw_private *dev_priv, struct vmw_fence_obj **p_fence, uint32_t *p_handle); -extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, +extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, struct vmw_fpriv *vmw_fp, int ret, struct drm_vmw_fence_rep __user *user_fence_rep, struct vmw_fence_obj *fence, uint32_t fence_handle, - int32_t out_fence_fd, - struct sync_file *sync_file); + int32_t out_fence_fd); bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd); /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 44ca23b0ea4eede7b33d1b26a087e0e8abe199bc..dd2ff441068eca222553471f740e4e49e3aa8055 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3879,17 +3879,17 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv, * Also if copying fails, user-space will be unable to signal the fence object * so we wait for it immediately, and then unreference the user-space reference. */ -void +int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, struct vmw_fpriv *vmw_fp, int ret, struct drm_vmw_fence_rep __user *user_fence_rep, struct vmw_fence_obj *fence, uint32_t fence_handle, - int32_t out_fence_fd, struct sync_file *sync_file) + int32_t out_fence_fd) { struct drm_vmw_fence_rep fence_rep; if (user_fence_rep == NULL) - return; + return 0; memset(&fence_rep, 0, sizeof(fence_rep)); @@ -3917,19 +3917,13 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, * handle. */ if (unlikely(ret != 0) && (fence_rep.error == 0)) { - if (sync_file) - fput(sync_file->file); - - if (fence_rep.fd != -1) { - put_unused_fd(fence_rep.fd); - fence_rep.fd = -1; - } - ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle); VMW_DEBUG_USER("Fence copy error. Syncing.\n"); (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); } + + return ret ? -EFAULT : 0; } /** @@ -4266,16 +4260,23 @@ int vmw_execbuf_process(struct drm_file *file_priv, (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); + } + } + + ret = vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, + user_fence_rep, fence, handle, out_fence_fd); + + if (sync_file) { + if (ret) { + /* usercopy of fence failed, put the file object */ + fput(sync_file->file); + put_unused_fd(out_fence_fd); } else { /* Link the fence with the FD created earlier */ fd_install(out_fence_fd, sync_file->file); } } - vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, - user_fence_rep, fence, handle, out_fence_fd, - sync_file); - /* Don't unreference when handing fence out */ if (unlikely(out_fence != NULL)) { *out_fence = fence; @@ -4293,7 +4294,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, */ vmw_validation_unref_lists(&val_ctx); - return 0; + return ret; out_unlock_binding: mutex_unlock(&dev_priv->binding_mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index c60d395f9e2efc37ad2446e005fad15b8c5edbfa..5001b87aebe8142600ae3e851668e2b06fcb5a04 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1128,7 +1128,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, } vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence, - handle, -1, NULL); + handle, -1); vmw_fence_obj_unreference(&fence); return 0; out_no_create: diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 4e693e8de2c388378f527791a86648728e5c7ca8..bbd2f4ec08ec13f95e718d6434b3d11664f6f8f2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2501,7 +2501,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, if (file_priv) vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, user_fence_rep, fence, - handle, -1, NULL); + handle, -1); if (out_fence) *out_fence = fence; else diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index f5544157576c97fdec7fd72d4dc8de0b8480ddd4..9c0e45f56d84efb316d75262701201df717aca79 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -128,6 +128,8 @@ config HID_ACRUX_FF config HID_APPLE tristate "Apple {i,Power,Mac}Books" depends on HID + depends on LEDS_CLASS + depends on NEW_LEDS default !EXPERT help Support for some Apple devices which less or more break diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 7dc89dc6b0f0eb6f5fa3792c285bbf0b8ac798c5..0cf35caee9fa048480146c0cdcad0eac7c466280 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -7,6 +7,7 @@ * Copyright (c) 2005 Michael Haboustak for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina * Copyright (c) 2008 Jiri Slaby + * Copyright (c) 2019 Paul Pawlowski */ /* @@ -33,6 +34,7 @@ /* BIT(7) reserved, was: APPLE_IGNORE_HIDINPUT */ #define APPLE_NUMLOCK_EMULATION BIT(8) #define APPLE_RDESC_BATTERY BIT(9) +#define APPLE_BACKLIGHT_CTL BIT(10) #define APPLE_FLAG_FKEY 0x01 @@ -61,6 +63,12 @@ MODULE_PARM_DESC(swap_fn_leftctrl, "Swap the Fn and left Control keys. " "(For people who want to keep PC keyboard muscle memory. " "[0] = as-is, Mac layout, 1 = swapped, PC layout)"); +struct apple_sc_backlight { + struct led_classdev cdev; + struct hid_device *hdev; + unsigned short backlight_off, backlight_on_min, backlight_on_max; +}; + struct apple_sc { struct hid_device *hdev; unsigned long quirks; @@ -68,6 +76,7 @@ struct apple_sc { unsigned int fn_found; DECLARE_BITMAP(pressed_numlock, KEY_CNT); struct timer_list battery_timer; + struct apple_sc_backlight *backlight; }; struct apple_key_translation { @@ -76,6 +85,61 @@ struct apple_key_translation { u8 flags; }; +static const struct apple_key_translation magic_keyboard_alu_fn_keys[] = { + { KEY_BACKSPACE, KEY_DELETE }, + { KEY_ENTER, KEY_INSERT }, + { KEY_F1, KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY }, + { KEY_F2, KEY_BRIGHTNESSUP, APPLE_FLAG_FKEY }, + { KEY_F3, KEY_SCALE, APPLE_FLAG_FKEY }, + { KEY_F4, KEY_DASHBOARD, APPLE_FLAG_FKEY }, + { KEY_F6, KEY_NUMLOCK, APPLE_FLAG_FKEY }, + { KEY_F7, KEY_PREVIOUSSONG, APPLE_FLAG_FKEY }, + { KEY_F8, KEY_PLAYPAUSE, APPLE_FLAG_FKEY }, + { KEY_F9, KEY_NEXTSONG, APPLE_FLAG_FKEY }, + { KEY_F10, KEY_MUTE, APPLE_FLAG_FKEY }, + { KEY_F11, KEY_VOLUMEDOWN, APPLE_FLAG_FKEY }, + { KEY_F12, KEY_VOLUMEUP, APPLE_FLAG_FKEY }, + { KEY_UP, KEY_PAGEUP }, + { KEY_DOWN, KEY_PAGEDOWN }, + { KEY_LEFT, KEY_HOME }, + { KEY_RIGHT, KEY_END }, + { } +}; + +static const struct apple_key_translation magic_keyboard_2015_fn_keys[] = { + { KEY_BACKSPACE, KEY_DELETE }, + { KEY_ENTER, KEY_INSERT }, + { KEY_F1, KEY_BRIGHTNESSDOWN, APPLE_FLAG_FKEY }, + { KEY_F2, KEY_BRIGHTNESSUP, APPLE_FLAG_FKEY }, + { KEY_F3, KEY_SCALE, APPLE_FLAG_FKEY }, + { KEY_F4, KEY_DASHBOARD, APPLE_FLAG_FKEY }, + { KEY_F7, KEY_PREVIOUSSONG, APPLE_FLAG_FKEY }, + { KEY_F8, KEY_PLAYPAUSE, APPLE_FLAG_FKEY }, + { KEY_F9, KEY_NEXTSONG, APPLE_FLAG_FKEY }, + { KEY_F10, KEY_MUTE, APPLE_FLAG_FKEY }, + { KEY_F11, KEY_VOLUMEDOWN, APPLE_FLAG_FKEY }, + { KEY_F12, KEY_VOLUMEUP, APPLE_FLAG_FKEY }, + { KEY_UP, KEY_PAGEUP }, + { KEY_DOWN, KEY_PAGEDOWN }, + { KEY_LEFT, KEY_HOME }, + { KEY_RIGHT, KEY_END }, + { } +}; + +struct apple_backlight_config_report { + u8 report_id; + u8 version; + u16 backlight_off, backlight_on_min, backlight_on_max; +}; + +struct apple_backlight_set_report { + u8 report_id; + u8 version; + u16 backlight; + u16 rate; +}; + + static const struct apple_key_translation apple2021_fn_keys[] = { { KEY_BACKSPACE, KEY_DELETE }, { KEY_ENTER, KEY_INSERT }, @@ -119,6 +183,51 @@ static const struct apple_key_translation macbookair_fn_keys[] = { { } }; +static const struct apple_key_translation macbookpro_no_esc_fn_keys[] = { + { KEY_BACKSPACE, KEY_DELETE }, + { KEY_ENTER, KEY_INSERT }, + { KEY_GRAVE, KEY_ESC }, + { KEY_1, KEY_F1 }, + { KEY_2, KEY_F2 }, + { KEY_3, KEY_F3 }, + { KEY_4, KEY_F4 }, + { KEY_5, KEY_F5 }, + { KEY_6, KEY_F6 }, + { KEY_7, KEY_F7 }, + { KEY_8, KEY_F8 }, + { KEY_9, KEY_F9 }, + { KEY_0, KEY_F10 }, + { KEY_MINUS, KEY_F11 }, + { KEY_EQUAL, KEY_F12 }, + { KEY_UP, KEY_PAGEUP }, + { KEY_DOWN, KEY_PAGEDOWN }, + { KEY_LEFT, KEY_HOME }, + { KEY_RIGHT, KEY_END }, + { } +}; + +static const struct apple_key_translation macbookpro_dedicated_esc_fn_keys[] = { + { KEY_BACKSPACE, KEY_DELETE }, + { KEY_ENTER, KEY_INSERT }, + { KEY_1, KEY_F1 }, + { KEY_2, KEY_F2 }, + { KEY_3, KEY_F3 }, + { KEY_4, KEY_F4 }, + { KEY_5, KEY_F5 }, + { KEY_6, KEY_F6 }, + { KEY_7, KEY_F7 }, + { KEY_8, KEY_F8 }, + { KEY_9, KEY_F9 }, + { KEY_0, KEY_F10 }, + { KEY_MINUS, KEY_F11 }, + { KEY_EQUAL, KEY_F12 }, + { KEY_UP, KEY_PAGEUP }, + { KEY_DOWN, KEY_PAGEDOWN }, + { KEY_LEFT, KEY_HOME }, + { KEY_RIGHT, KEY_END }, + { } +}; + static const struct apple_key_translation apple_fn_keys[] = { { KEY_BACKSPACE, KEY_DELETE }, { KEY_ENTER, KEY_INSERT }, @@ -202,6 +311,15 @@ static const struct apple_key_translation swapped_fn_leftctrl_keys[] = { { } }; +static inline void apple_setup_key_translation(struct input_dev *input, + const struct apple_key_translation *table) +{ + const struct apple_key_translation *trans; + + for (trans = table; trans->from; trans++) + set_bit(trans->to, input->keybit); +} + static const struct apple_key_translation *apple_find_translation( const struct apple_key_translation *table, u16 from) { @@ -242,10 +360,34 @@ static int hidinput_apple_event(struct hid_device *hid, struct input_dev *input, } if (fnmode) { - if (hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 || - hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 || - hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021) + if (hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI || + hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO || + hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_JIS || + hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI || + hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO || + hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS || + hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ANSI || + hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_ISO || + hid->product == USB_DEVICE_ID_APPLE_ALU_WIRELESS_2011_JIS) + table = magic_keyboard_alu_fn_keys; + else if (hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2015 || + hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2015) + table = magic_keyboard_2015_fn_keys; + else if (hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021 || + hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021 || + hid->product == USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021) table = apple2021_fn_keys; + else if (hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132 || + hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680 || + hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213) + table = macbookpro_no_esc_fn_keys; + else if (hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K || + hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223 || + hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F) + table = macbookpro_dedicated_esc_fn_keys; + else if (hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K || + hid->product == USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K) + table = apple_fn_keys; else if (hid->product >= USB_DEVICE_ID_APPLE_WELLSPRING4_ANSI && hid->product <= USB_DEVICE_ID_APPLE_WELLSPRING4A_JIS) table = macbookair_fn_keys; @@ -452,30 +594,21 @@ static __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc, static void apple_setup_input(struct input_dev *input) { - const struct apple_key_translation *trans; - set_bit(KEY_NUMLOCK, input->keybit); /* Enable all needed keys */ - for (trans = apple_fn_keys; trans->from; trans++) - set_bit(trans->to, input->keybit); - - for (trans = powerbook_fn_keys; trans->from; trans++) - set_bit(trans->to, input->keybit); - - for (trans = powerbook_numlock_keys; trans->from; trans++) - set_bit(trans->to, input->keybit); - - for (trans = apple_iso_keyboard; trans->from; trans++) - set_bit(trans->to, input->keybit); - - for (trans = apple2021_fn_keys; trans->from; trans++) - set_bit(trans->to, input->keybit); - - if (swap_fn_leftctrl) { - for (trans = swapped_fn_leftctrl_keys; trans->from; trans++) - set_bit(trans->to, input->keybit); - } + apple_setup_key_translation(input, apple_fn_keys); + apple_setup_key_translation(input, powerbook_fn_keys); + apple_setup_key_translation(input, powerbook_numlock_keys); + apple_setup_key_translation(input, apple_iso_keyboard); + apple_setup_key_translation(input, magic_keyboard_alu_fn_keys); + apple_setup_key_translation(input, magic_keyboard_2015_fn_keys); + apple_setup_key_translation(input, apple2021_fn_keys); + apple_setup_key_translation(input, macbookpro_no_esc_fn_keys); + apple_setup_key_translation(input, macbookpro_dedicated_esc_fn_keys); + + if (swap_fn_leftctrl) + apple_setup_key_translation(input, swapped_fn_leftctrl_keys); } static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi, @@ -530,6 +663,105 @@ static int apple_input_configured(struct hid_device *hdev, return 0; } +static bool apple_backlight_check_support(struct hid_device *hdev) +{ + int i; + unsigned int hid; + struct hid_report *report; + + list_for_each_entry(report, &hdev->report_enum[HID_INPUT_REPORT].report_list, list) { + for (i = 0; i < report->maxfield; i++) { + hid = report->field[i]->usage->hid; + if ((hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR && (hid & HID_USAGE) == 0xf) + return true; + } + } + + return false; +} + +static int apple_backlight_set(struct hid_device *hdev, u16 value, u16 rate) +{ + int ret = 0; + struct apple_backlight_set_report *rep; + + rep = kmalloc(sizeof(*rep), GFP_KERNEL); + if (rep == NULL) + return -ENOMEM; + + rep->report_id = 0xB0; + rep->version = 1; + rep->backlight = value; + rep->rate = rate; + + ret = hid_hw_raw_request(hdev, 0xB0u, (u8 *) rep, sizeof(*rep), + HID_OUTPUT_REPORT, HID_REQ_SET_REPORT); + + kfree(rep); + return ret; +} + +static int apple_backlight_led_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + struct apple_sc_backlight *backlight = container_of(led_cdev, + struct apple_sc_backlight, cdev); + + return apple_backlight_set(backlight->hdev, brightness, 0); +} + +static int apple_backlight_init(struct hid_device *hdev) +{ + int ret; + struct apple_sc *asc = hid_get_drvdata(hdev); + struct apple_backlight_config_report *rep; + + if (!apple_backlight_check_support(hdev)) + return -EINVAL; + + rep = kmalloc(0x200, GFP_KERNEL); + if (rep == NULL) + return -ENOMEM; + + ret = hid_hw_raw_request(hdev, 0xBFu, (u8 *) rep, sizeof(*rep), + HID_FEATURE_REPORT, HID_REQ_GET_REPORT); + if (ret < 0) { + hid_err(hdev, "backlight request failed: %d\n", ret); + goto cleanup_and_exit; + } + if (ret < 8 || rep->version != 1) { + hid_err(hdev, "backlight config struct: bad version %i\n", rep->version); + ret = -EINVAL; + goto cleanup_and_exit; + } + + hid_dbg(hdev, "backlight config: off=%u, on_min=%u, on_max=%u\n", + rep->backlight_off, rep->backlight_on_min, rep->backlight_on_max); + + asc->backlight = devm_kzalloc(&hdev->dev, sizeof(*asc->backlight), GFP_KERNEL); + if (!asc->backlight) { + ret = -ENOMEM; + goto cleanup_and_exit; + } + + asc->backlight->hdev = hdev; + asc->backlight->cdev.name = "apple::kbd_backlight"; + asc->backlight->cdev.max_brightness = rep->backlight_on_max; + asc->backlight->cdev.brightness_set_blocking = apple_backlight_led_set; + + ret = apple_backlight_set(hdev, 0, 0); + if (ret < 0) { + hid_err(hdev, "backlight set request failed: %d\n", ret); + goto cleanup_and_exit; + } + + ret = devm_led_classdev_register(&hdev->dev, &asc->backlight->cdev); + +cleanup_and_exit: + kfree(rep); + return ret; +} + static int apple_probe(struct hid_device *hdev, const struct hid_device_id *id) { @@ -565,6 +797,9 @@ static int apple_probe(struct hid_device *hdev, jiffies + msecs_to_jiffies(APPLE_BATTERY_TIMEOUT_MS)); apple_fetch_battery(hdev); + if (quirks & APPLE_BACKLIGHT_CTL) + apple_backlight_init(hdev); + return 0; } @@ -736,6 +971,22 @@ static const struct hid_device_id apple_devices[] = { .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS), .driver_data = APPLE_HAS_FN | APPLE_RDESC_JIS }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K), + .driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132), + .driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680), + .driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213), + .driver_data = APPLE_HAS_FN | APPLE_BACKLIGHT_CTL }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K), + .driver_data = APPLE_HAS_FN }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223), + .driver_data = APPLE_HAS_FN }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K), + .driver_data = APPLE_HAS_FN }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F), + .driver_data = APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI), .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO), @@ -748,15 +999,15 @@ static const struct hid_device_id apple_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY), .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021), - .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021), - .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_FINGERPRINT_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021), - .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, + .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK | APPLE_RDESC_BATTERY }, { HID_BLUETOOTH_DEVICE(BT_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_MAGIC_KEYBOARD_NUMPAD_2021), .driver_data = APPLE_HAS_FN | APPLE_ISO_TILDE_QUIRK }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 78bd3ddda442623e61651b6bea0987f05e79b680..6c2a36b16ed2df84b99df23a529f59f59f09ab00 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -167,6 +167,14 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI 0x0272 #define USB_DEVICE_ID_APPLE_WELLSPRING9_ISO 0x0273 #define USB_DEVICE_ID_APPLE_WELLSPRING9_JIS 0x0274 +#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K 0x027a +#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132 0x027b +#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680 0x027c +#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213 0x027d +#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K 0x027e +#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223 0x027f +#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K 0x0280 +#define USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F 0x0340 #define USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY 0x030a #define USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY 0x030b #define USB_DEVICE_ID_APPLE_IRCONTROL 0x8240 diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index c066ba901867bd56ba9bfef66132c638c2b1043a..dc67717d2dabc4f37e98f061f39bdc935f36db4c 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -295,6 +295,14 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) }, @@ -930,6 +938,14 @@ static const struct hid_device_id hid_mouse_ignore_list[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING9_JIS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J140K) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J132) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J680) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J213) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J214K) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J223) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J230K) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRINGT2_J152F) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_FOUNTAIN_TP_ONLY) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER1_TP_ONLY) }, { } diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index ca873a3b98dbe6c81023be39f98da71565af598d..f2d05bff424530d7ba28c6ac65e2874e0cc70907 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1660,6 +1660,13 @@ static int balloon_connect_vsp(struct hv_device *dev) unsigned long t; int ret; + /* + * max_pkt_size should be large enough for one vmbus packet header plus + * our receive buffer size. Hyper-V sends messages up to + * HV_HYP_PAGE_SIZE bytes long on balloon channel. + */ + dev->channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2; + ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0, balloon_onchannelcallback, dev); if (ret) diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c index eb2833d2b5d0a400685ffa60dcab4059b5d3202a..8328851986430fd94eb0e9632e9c9bf768564de7 100644 --- a/drivers/hv/hv_utils_transport.c +++ b/drivers/hv/hv_utils_transport.c @@ -13,7 +13,7 @@ #include "hv_utils_transport.h" static DEFINE_SPINLOCK(hvt_list_lock); -static struct list_head hvt_list = LIST_HEAD_INIT(hvt_list); +static LIST_HEAD(hvt_list); static void hvt_reset(struct hvutil_transport *hvt) { diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 17bf55fe316976e3e6eaa34d5fa6161e916f41c6..12a2b37e87f30518e57f404d26c255738f21f793 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2028,8 +2028,10 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) kobj->kset = dev->channels_kset; ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, "%u", relid); - if (ret) + if (ret) { + kobject_put(kobj); return ret; + } ret = sysfs_create_group(kobj, &vmbus_chan_group); @@ -2038,6 +2040,7 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) * The calling functions' error handling paths will cleanup the * empty channel directory. */ + kobject_put(kobj); dev_err(device, "Unable to set up channel sysfs files\n"); return ret; } @@ -2079,7 +2082,6 @@ struct hv_device *vmbus_device_create(const guid_t *type, return child_device_obj; } -static u64 vmbus_dma_mask = DMA_BIT_MASK(64); /* * vmbus_device_register - Register the child device */ @@ -2120,8 +2122,9 @@ int vmbus_device_register(struct hv_device *child_device_obj) } hv_debug_add_dev_dir(child_device_obj); - child_device_obj->device.dma_mask = &vmbus_dma_mask; child_device_obj->device.dma_parms = &child_device_obj->dma_parms; + child_device_obj->device.dma_mask = &child_device_obj->dma_mask; + dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); return 0; err_kset_unregister: diff --git a/drivers/hwmon/adt7470.c b/drivers/hwmon/adt7470.c index d519aca4a9d645a732e1f39f70fc9d9c07c6aafc..fb6d14d213a18c8595e8111f6c0c72890e1c0d61 100644 --- a/drivers/hwmon/adt7470.c +++ b/drivers/hwmon/adt7470.c @@ -662,6 +662,9 @@ static int adt7470_fan_write(struct device *dev, u32 attr, int channel, long val struct adt7470_data *data = dev_get_drvdata(dev); int err; + if (val <= 0) + return -EINVAL; + val = FAN_RPM_TO_PERIOD(val); val = clamp_val(val, 1, 65534); diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index d401f9acf45065348a8087eaf7544096e1214b30..9949eeb79378ed5da9d79424e7f125ae8e909a9d 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -451,7 +451,7 @@ static int i8k_get_power_status(void) static long i8k_ioctl(struct file *fp, unsigned int cmd, unsigned long arg) { - struct dell_smm_data *data = PDE_DATA(file_inode(fp)); + struct dell_smm_data *data = pde_data(file_inode(fp)); int __user *argp = (int __user *)arg; int speed, err; int val = 0; @@ -585,7 +585,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset) static int i8k_open_fs(struct inode *inode, struct file *file) { - return single_open(file, i8k_proc_show, PDE_DATA(inode)); + return single_open(file, i8k_proc_show, pde_data(inode)); } static const struct proc_ops i8k_proc_ops = { diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 74019dff2550efef903814d3ffaaaee4e168ad91..1c9493c7081324a6b7781e2ff398332ad15306b4 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -373,7 +373,7 @@ static const struct lm90_params lm90_params[] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, - .max_convrate = 8, + .max_convrate = 7, }, [lm86] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT @@ -394,12 +394,13 @@ static const struct lm90_params lm90_params[] = { .max_convrate = 9, }, [max6646] = { - .flags = LM90_HAVE_CRIT, + .flags = LM90_HAVE_CRIT | LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6654] = { + .flags = LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 7, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, @@ -418,7 +419,7 @@ static const struct lm90_params lm90_params[] = { }, [max6680] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT - | LM90_HAVE_CRIT_ALRM_SWP, + | LM90_HAVE_CRIT_ALRM_SWP | LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 7, }, @@ -848,7 +849,7 @@ static int lm90_update_device(struct device *dev) * Re-enable ALERT# output if it was originally enabled and * relevant alarms are all clear */ - if (!(data->config_orig & 0x80) && + if ((client->irq || !(data->config_orig & 0x80)) && !(data->alarms & data->alert_alarms)) { if (data->config & 0x80) { dev_dbg(&client->dev, "Re-enabling ALERT#\n"); @@ -1807,22 +1808,22 @@ static bool lm90_is_tripped(struct i2c_client *client, u16 *status) if (st & LM90_STATUS_LLOW) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_min, 0); + hwmon_temp_min_alarm, 0); if (st & LM90_STATUS_RLOW) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_min, 1); + hwmon_temp_min_alarm, 1); if (st2 & MAX6696_STATUS2_R2LOW) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_min, 2); + hwmon_temp_min_alarm, 2); if (st & LM90_STATUS_LHIGH) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_max, 0); + hwmon_temp_max_alarm, 0); if (st & LM90_STATUS_RHIGH) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_max, 1); + hwmon_temp_max_alarm, 1); if (st2 & MAX6696_STATUS2_R2HIGH) hwmon_notify_event(data->hwmon_dev, hwmon_temp, - hwmon_temp_max, 2); + hwmon_temp_max_alarm, 2); return true; } diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c index 2a4bed0ab226bdfd76d80b8ebe1a43bfb8a5e71d..7352d2b3c756d7c9820f972ac5444430c6824af7 100644 --- a/drivers/hwmon/ltc2992.c +++ b/drivers/hwmon/ltc2992.c @@ -248,8 +248,7 @@ static int ltc2992_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask gpio_status = reg; - gpio_nr = 0; - for_each_set_bit_from(gpio_nr, mask, LTC2992_GPIO_NR) { + for_each_set_bit(gpio_nr, mask, LTC2992_GPIO_NR) { if (test_bit(LTC2992_GPIO_BIT(gpio_nr), &gpio_status)) set_bit(gpio_nr, bits); } diff --git a/drivers/hwmon/nct6775.c b/drivers/hwmon/nct6775.c index fd3f91cb01c6adf5ba9677582ea298d8377037c8..098d12b9ecdad7e621e87cb82f434a2c3916a906 100644 --- a/drivers/hwmon/nct6775.c +++ b/drivers/hwmon/nct6775.c @@ -1175,7 +1175,7 @@ static inline u8 in_to_reg(u32 val, u8 nr) struct nct6775_data { int addr; /* IO base of hw monitor block */ - int sioreg; /* SIO register address */ + struct nct6775_sio_data *sio_data; enum kinds kind; const char *name; @@ -3559,7 +3559,7 @@ clear_caseopen(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct nct6775_data *data = dev_get_drvdata(dev); - struct nct6775_sio_data *sio_data = dev_get_platdata(dev); + struct nct6775_sio_data *sio_data = data->sio_data; int nr = to_sensor_dev_attr(attr)->index - INTRUSION_ALARM_BASE; unsigned long val; u8 reg; @@ -3967,7 +3967,7 @@ static int nct6775_probe(struct platform_device *pdev) return -ENOMEM; data->kind = sio_data->kind; - data->sioreg = sio_data->sioreg; + data->sio_data = sio_data; if (sio_data->access == access_direct) { data->addr = res->start; diff --git a/drivers/hwmon/pmbus/ir38064.c b/drivers/hwmon/pmbus/ir38064.c index 0ea7e1c18bdc7fecfff3ffce4a01b29b2db16203..09276e397194524b04ff8cf668cf45701dc44234 100644 --- a/drivers/hwmon/pmbus/ir38064.c +++ b/drivers/hwmon/pmbus/ir38064.c @@ -62,7 +62,7 @@ static const struct i2c_device_id ir38064_id[] = { MODULE_DEVICE_TABLE(i2c, ir38064_id); -static const struct of_device_id ir38064_of_match[] = { +static const struct of_device_id __maybe_unused ir38064_of_match[] = { { .compatible = "infineon,ir38060" }, { .compatible = "infineon,ir38064" }, { .compatible = "infineon,ir38164" }, diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index e45c600fccc0b24670f65344870da54eaab9eae2..bc2cfa5f9592242ab02541d8f2b11eebc53a18df 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -347,7 +347,7 @@ static int ad7124_find_free_config_slot(struct ad7124_state *st) { unsigned int free_cfg_slot; - free_cfg_slot = find_next_zero_bit(&st->cfg_slots_status, AD7124_MAX_CONFIGS, 0); + free_cfg_slot = find_first_zero_bit(&st->cfg_slots_status, AD7124_MAX_CONFIGS); if (free_cfg_slot == AD7124_MAX_CONFIGS) return -1; diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 94eb9f6cf1286a3f23a45f4c6b046bba319212fb..208b5193c621096ba4bf4a369270a613cb6bf04a 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -1569,9 +1569,17 @@ static long iio_device_buffer_getfd(struct iio_dev *indio_dev, unsigned long arg } if (copy_to_user(ival, &fd, sizeof(fd))) { - put_unused_fd(fd); - ret = -EFAULT; - goto error_free_ib; + /* + * "Leak" the fd, as there's not much we can do about this + * anyway. 'fd' might have been closed already, as + * anon_inode_getfd() called fd_install() on it, which made + * it reachable by userland. + * + * Instead of allowing a malicious user to play tricks with + * us, rely on the process exit path to do any necessary + * cleanup, as in releasing the file, if still needed. + */ + return -EFAULT; } return 0; diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index c903b74f46a4683b53d50e9b07596da13e4cdd71..35f0d5e7533d60b6038d033095053bdf18e61aa7 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3322,7 +3322,7 @@ static int cm_lap_handler(struct cm_work *work) ret = cm_init_av_by_path(param->alternate_path, NULL, &alt_av); if (ret) { rdma_destroy_ah_attr(&ah_attr); - return -EINVAL; + goto deref; } spin_lock_irq(&cm_id_priv->lock); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 27a00ce2e10120be29a5081b9226d7747dba23a1..c447526288f49e6614380d2635650d16c8621ece 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -67,8 +67,8 @@ static const char * const cma_events[] = { [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", }; -static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, - union ib_gid *mgid); +static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, + enum ib_gid_type gid_type); const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { @@ -1846,17 +1846,19 @@ static void destroy_mc(struct rdma_id_private *id_priv, if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (ndev) { + if (ndev && !send_only) { + enum ib_gid_type gid_type; union ib_gid mgid; - cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr, - &mgid); - - if (!send_only) - cma_igmp_send(ndev, &mgid, false); - - dev_put(ndev); + gid_type = id_priv->cma_dev->default_gid_type + [id_priv->id.port_num - + rdma_start_port( + id_priv->cma_dev->device)]; + cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid, + gid_type); + cma_igmp_send(ndev, &mgid, false); } + dev_put(ndev); cancel_work_sync(&mc->iboe_join.work); } diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2b72c4fa955066052754e7112d098747320d640f..9d6ac9dff39a28bf865558420d2bb1e106bc52e6 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -95,6 +95,7 @@ struct ucma_context { u64 uid; struct list_head list; + struct list_head mc_list; struct work_struct close_work; }; @@ -105,6 +106,7 @@ struct ucma_multicast { u64 uid; u8 join_state; + struct list_head list; struct sockaddr_storage addr; }; @@ -198,6 +200,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) INIT_WORK(&ctx->close_work, ucma_close_id); init_completion(&ctx->comp); + INIT_LIST_HEAD(&ctx->mc_list); /* So list_del() will work if we don't do ucma_finish_ctx() */ INIT_LIST_HEAD(&ctx->list); ctx->file = file; @@ -484,19 +487,19 @@ err1: static void ucma_cleanup_multicast(struct ucma_context *ctx) { - struct ucma_multicast *mc; - unsigned long index; + struct ucma_multicast *mc, *tmp; - xa_for_each(&multicast_table, index, mc) { - if (mc->ctx != ctx) - continue; + xa_lock(&multicast_table); + list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { + list_del(&mc->list); /* * At this point mc->ctx->ref is 0 so the mc cannot leave the * lock on the reader and this is enough serialization */ - xa_erase(&multicast_table, index); + __xa_erase(&multicast_table, mc->id); kfree(mc); } + xa_unlock(&multicast_table); } static void ucma_cleanup_mc_events(struct ucma_multicast *mc) @@ -1469,12 +1472,16 @@ static ssize_t ucma_process_join(struct ucma_file *file, mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); - if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, + xa_lock(&multicast_table); + if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL)) { ret = -ENOMEM; goto err_free_mc; } + list_add_tail(&mc->list, &ctx->mc_list); + xa_unlock(&multicast_table); + mutex_lock(&ctx->mutex); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); @@ -1500,8 +1507,11 @@ err_leave_multicast: mutex_unlock(&ctx->mutex); ucma_cleanup_mc_events(mc); err_xa_erase: - xa_erase(&multicast_table, mc->id); + xa_lock(&multicast_table); + list_del(&mc->list); + __xa_erase(&multicast_table, mc->id); err_free_mc: + xa_unlock(&multicast_table); kfree(mc); err_put_ctx: ucma_put_ctx(ctx); @@ -1569,15 +1579,17 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, mc = ERR_PTR(-EINVAL); else if (!refcount_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); - else - __xa_erase(&multicast_table, mc->id); - xa_unlock(&multicast_table); if (IS_ERR(mc)) { + xa_unlock(&multicast_table); ret = PTR_ERR(mc); goto out; } + list_del(&mc->list); + __xa_erase(&multicast_table, mc->id); + xa_unlock(&multicast_table); + mutex_lock(&mc->ctx->mutex); rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); mutex_unlock(&mc->ctx->mutex); diff --git a/drivers/infiniband/hw/hfi1/ipoib.h b/drivers/infiniband/hw/hfi1/ipoib.h index 90912293424648470b74e5b269d2bf6527c38906..aec60d4888eb0db1dc58ae5844dee4f630f92099 100644 --- a/drivers/infiniband/hw/hfi1/ipoib.h +++ b/drivers/infiniband/hw/hfi1/ipoib.h @@ -55,7 +55,7 @@ union hfi1_ipoib_flow { */ struct ipoib_txreq { struct sdma_txreq txreq; - struct hfi1_sdma_header sdma_hdr; + struct hfi1_sdma_header *sdma_hdr; int sdma_status; int complete; struct hfi1_ipoib_dev_priv *priv; diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c index e1a2b02bbd91bea1323f006436639e1ed5cfa2d6..5d814afdf7f39a2f62eb65853a94dd97914fc113 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_main.c +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -22,26 +22,35 @@ static int hfi1_ipoib_dev_init(struct net_device *dev) int ret; dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; ret = priv->netdev_ops->ndo_init(dev); if (ret) - return ret; + goto out_ret; ret = hfi1_netdev_add_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr), dev); if (ret < 0) { priv->netdev_ops->ndo_uninit(dev); - return ret; + goto out_ret; } return 0; +out_ret: + free_percpu(dev->tstats); + dev->tstats = NULL; + return ret; } static void hfi1_ipoib_dev_uninit(struct net_device *dev) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); + free_percpu(dev->tstats); + dev->tstats = NULL; + hfi1_netdev_remove_data(priv->dd, qpn_from_mac(priv->netdev->dev_addr)); priv->netdev_ops->ndo_uninit(dev); @@ -166,12 +175,7 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev) hfi1_ipoib_rxq_deinit(priv->netdev); free_percpu(dev->tstats); -} - -static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev) -{ - hfi1_ipoib_netdev_dtor(dev); - free_netdev(dev); + dev->tstats = NULL; } static void hfi1_ipoib_set_id(struct net_device *dev, int id) @@ -211,24 +215,23 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device, priv->port_num = port_num; priv->netdev_ops = netdev->netdev_ops; - netdev->netdev_ops = &hfi1_ipoib_netdev_ops; - ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey); rc = hfi1_ipoib_txreq_init(priv); if (rc) { dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc); - hfi1_ipoib_free_rdma_netdev(netdev); return rc; } rc = hfi1_ipoib_rxq_init(netdev); if (rc) { dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc); - hfi1_ipoib_free_rdma_netdev(netdev); + hfi1_ipoib_txreq_deinit(priv); return rc; } + netdev->netdev_ops = &hfi1_ipoib_netdev_ops; + netdev->priv_destructor = hfi1_ipoib_netdev_dtor; netdev->needs_free_netdev = true; diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c index f4010890309f7a191f2933ba53e994098437a8f3..d6bbdb8fcb50a2945fa010c6bb338fad18fb4df1 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_tx.c +++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c @@ -122,7 +122,7 @@ static void hfi1_ipoib_free_tx(struct ipoib_txreq *tx, int budget) dd_dev_warn(priv->dd, "%s: Status = 0x%x pbc 0x%llx txq = %d sde = %d\n", __func__, tx->sdma_status, - le64_to_cpu(tx->sdma_hdr.pbc), tx->txq->q_idx, + le64_to_cpu(tx->sdma_hdr->pbc), tx->txq->q_idx, tx->txq->sde->this_idx); } @@ -231,7 +231,7 @@ static int hfi1_ipoib_build_tx_desc(struct ipoib_txreq *tx, { struct hfi1_devdata *dd = txp->dd; struct sdma_txreq *txreq = &tx->txreq; - struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; + struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr; u16 pkt_bytes = sizeof(sdma_hdr->pbc) + (txp->hdr_dwords << 2) + tx->skb->len; int ret; @@ -256,7 +256,7 @@ static void hfi1_ipoib_build_ib_tx_headers(struct ipoib_txreq *tx, struct ipoib_txparms *txp) { struct hfi1_ipoib_dev_priv *priv = tx->txq->priv; - struct hfi1_sdma_header *sdma_hdr = &tx->sdma_hdr; + struct hfi1_sdma_header *sdma_hdr = tx->sdma_hdr; struct sk_buff *skb = tx->skb; struct hfi1_pportdata *ppd = ppd_from_ibp(txp->ibp); struct rdma_ah_attr *ah_attr = txp->ah_attr; @@ -483,7 +483,7 @@ static int hfi1_ipoib_send_dma_single(struct net_device *dev, if (likely(!ret)) { tx_ok: trace_sdma_output_ibhdr(txq->priv->dd, - &tx->sdma_hdr.hdr, + &tx->sdma_hdr->hdr, ib_is_sc5(txp->flow.sc5)); hfi1_ipoib_check_queue_depth(txq); return NETDEV_TX_OK; @@ -547,7 +547,7 @@ static int hfi1_ipoib_send_dma_list(struct net_device *dev, hfi1_ipoib_check_queue_depth(txq); trace_sdma_output_ibhdr(txq->priv->dd, - &tx->sdma_hdr.hdr, + &tx->sdma_hdr->hdr, ib_is_sc5(txp->flow.sc5)); if (!netdev_xmit_more()) @@ -683,7 +683,8 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) { struct net_device *dev = priv->netdev; u32 tx_ring_size, tx_item_size; - int i; + struct hfi1_ipoib_circ_buf *tx_ring; + int i, j; /* * Ring holds 1 less than tx_ring_size @@ -701,7 +702,9 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) for (i = 0; i < dev->num_tx_queues; i++) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + struct ipoib_txreq *tx; + tx_ring = &txq->tx_ring; iowait_init(&txq->wait, 0, hfi1_ipoib_flush_txq, @@ -725,14 +728,19 @@ int hfi1_ipoib_txreq_init(struct hfi1_ipoib_dev_priv *priv) priv->dd->node); txq->tx_ring.items = - kcalloc_node(tx_ring_size, tx_item_size, - GFP_KERNEL, priv->dd->node); + kvzalloc_node(array_size(tx_ring_size, tx_item_size), + GFP_KERNEL, priv->dd->node); if (!txq->tx_ring.items) goto free_txqs; txq->tx_ring.max_items = tx_ring_size; - txq->tx_ring.shift = ilog2(tx_ring_size); + txq->tx_ring.shift = ilog2(tx_item_size); txq->tx_ring.avail = hfi1_ipoib_ring_hwat(txq); + tx_ring = &txq->tx_ring; + for (j = 0; j < tx_ring_size; j++) + hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr = + kzalloc_node(sizeof(*tx->sdma_hdr), + GFP_KERNEL, priv->dd->node); netif_tx_napi_add(dev, &txq->napi, hfi1_ipoib_poll_tx_ring, @@ -746,7 +754,10 @@ free_txqs: struct hfi1_ipoib_txq *txq = &priv->txqs[i]; netif_napi_del(&txq->napi); - kfree(txq->tx_ring.items); + tx_ring = &txq->tx_ring; + for (j = 0; j < tx_ring_size; j++) + kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr); + kvfree(tx_ring->items); } kfree(priv->txqs); @@ -780,17 +791,20 @@ static void hfi1_ipoib_drain_tx_list(struct hfi1_ipoib_txq *txq) void hfi1_ipoib_txreq_deinit(struct hfi1_ipoib_dev_priv *priv) { - int i; + int i, j; for (i = 0; i < priv->netdev->num_tx_queues; i++) { struct hfi1_ipoib_txq *txq = &priv->txqs[i]; + struct hfi1_ipoib_circ_buf *tx_ring = &txq->tx_ring; iowait_cancel_work(&txq->wait); iowait_sdma_drain(&txq->wait); hfi1_ipoib_drain_tx_list(txq); netif_napi_del(&txq->napi); hfi1_ipoib_drain_tx_ring(txq); - kfree(txq->tx_ring.items); + for (j = 0; j < tx_ring->max_items; j++) + kfree(hfi1_txreq_from_idx(tx_ring, j)->sdma_hdr); + kvfree(tx_ring->items); } kfree(priv->txqs); diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 4b1b16e7a75b076fba5da4a7c9c8499fb089a1a1..89234d04cc652cc48784a2af3161db04655cf6c3 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -1709,14 +1709,14 @@ clean_msixtbl: */ static void irdma_get_used_rsrc(struct irdma_device *iwdev) { - iwdev->rf->used_pds = find_next_zero_bit(iwdev->rf->allocated_pds, - iwdev->rf->max_pd, 0); - iwdev->rf->used_qps = find_next_zero_bit(iwdev->rf->allocated_qps, - iwdev->rf->max_qp, 0); - iwdev->rf->used_cqs = find_next_zero_bit(iwdev->rf->allocated_cqs, - iwdev->rf->max_cq, 0); - iwdev->rf->used_mrs = find_next_zero_bit(iwdev->rf->allocated_mrs, - iwdev->rf->max_mr, 0); + iwdev->rf->used_pds = find_first_zero_bit(iwdev->rf->allocated_pds, + iwdev->rf->max_pd); + iwdev->rf->used_qps = find_first_zero_bit(iwdev->rf->allocated_qps, + iwdev->rf->max_qp); + iwdev->rf->used_cqs = find_first_zero_bit(iwdev->rf->allocated_cqs, + iwdev->rf->max_cq); + iwdev->rf->used_mrs = find_first_zero_bit(iwdev->rf->allocated_mrs, + iwdev->rf->max_mr); } void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1c3d9722998877055b4dce747d3f604e3321be0b..93b1650eacfab0e3d42f0358513ce706386d7c04 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -3237,7 +3237,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: ew = kmalloc(sizeof *ew, GFP_ATOMIC); if (!ew) - break; + return; INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index 3305f2744bfaaddf990979028e9659b680791e81..ae50b56e891321fb8bf98b7c2b28bd626b5169c5 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -3073,6 +3073,8 @@ do_write: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; + if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1))) + goto inv_err; if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), wqe->atomic_wr.remote_addr, wqe->atomic_wr.rkey, diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 368959ae9a8cc382bb0bf89f66bce731ddbf3f24..df03d84c6868ace3d5fe68c47e62cdd33e8b44d2 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -644,14 +644,9 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp) return &qp->orq[qp->orq_get % qp->attrs.orq_size]; } -static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp) -{ - return &qp->orq[qp->orq_put % qp->attrs.orq_size]; -} - static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) { - struct siw_sqe *orq_e = orq_get_tail(qp); + struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size]; if (READ_ONCE(orq_e->flags) == 0) return orq_e; diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 60116f20653c776c3de38cbb3c954d9bdffbb112..875ea6f1b04a29034b6b41f8bfedaefc01af28c6 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -1153,11 +1153,12 @@ static int siw_check_tx_fence(struct siw_qp *qp) spin_lock_irqsave(&qp->orq_lock, flags); - rreq = orq_get_current(qp); - /* free current orq entry */ + rreq = orq_get_current(qp); WRITE_ONCE(rreq->flags, 0); + qp->orq_get++; + if (qp->tx_ctx.orq_fence) { if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) { pr_warn("siw: [QP %u]: fence resume: bad status %d\n", @@ -1165,10 +1166,12 @@ static int siw_check_tx_fence(struct siw_qp *qp) rv = -EPROTO; goto out; } - /* resume SQ processing */ + /* resume SQ processing, if possible */ if (tx_waiting->sqe.opcode == SIW_OP_READ || tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - rreq = orq_get_tail(qp); + + /* SQ processing was stopped because of a full ORQ */ + rreq = orq_get_free(qp); if (unlikely(!rreq)) { pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp)); rv = -EPROTO; @@ -1181,15 +1184,14 @@ static int siw_check_tx_fence(struct siw_qp *qp) resume_tx = 1; } else if (siw_orq_empty(qp)) { + /* + * SQ processing was stopped by fenced work request. + * Resume since all previous Read's are now completed. + */ qp->tx_ctx.orq_fence = 0; resume_tx = 1; - } else { - pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n", - qp_id(qp), qp->orq_get, qp->orq_put); - rv = -EPROTO; } } - qp->orq_get++; out: spin_unlock_irqrestore(&qp->orq_lock, flags); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index a3dd2cb6d5c989cc3297b47997deecff27ff92ca..54ef367b074abad51d96686d5e423974f9c6830f 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -313,7 +313,8 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (atomic_inc_return(&sdev->num_qp) > SIW_MAX_QP) { siw_dbg(base_dev, "too many QP's\n"); - return -ENOMEM; + rv = -ENOMEM; + goto err_atomic; } if (attrs->qp_type != IB_QPT_RC) { siw_dbg(base_dev, "only RC QP's supported\n"); diff --git a/drivers/input/touchscreen/wm97xx-core.c b/drivers/input/touchscreen/wm97xx-core.c index 78d2ee99f37ac7aee418a50180fc96996192b351..1b58611c808406da9c5a22f070491adedb55b9cb 100644 --- a/drivers/input/touchscreen/wm97xx-core.c +++ b/drivers/input/touchscreen/wm97xx-core.c @@ -615,10 +615,9 @@ static int wm97xx_register_touch(struct wm97xx *wm) * extensions) */ wm->touch_dev = platform_device_alloc("wm97xx-touch", -1); - if (!wm->touch_dev) { - ret = -ENOMEM; - goto touch_err; - } + if (!wm->touch_dev) + return -ENOMEM; + platform_set_drvdata(wm->touch_dev, wm); wm->touch_dev->dev.parent = wm->dev; wm->touch_dev->dev.platform_data = pdata; @@ -629,9 +628,6 @@ static int wm97xx_register_touch(struct wm97xx *wm) return 0; touch_reg_err: platform_device_put(wm->touch_dev); -touch_err: - input_unregister_device(wm->input_dev); - wm->input_dev = NULL; return ret; } @@ -639,8 +635,6 @@ touch_err: static void wm97xx_unregister_touch(struct wm97xx *wm) { platform_device_unregister(wm->touch_dev); - input_unregister_device(wm->input_dev); - wm->input_dev = NULL; } static int _wm97xx_probe(struct wm97xx *wm) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index dc338acf33385b0c4bf59cf0979b97598124f7df..b10fb52ea44288747e0d992385a7b656541d92b7 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -834,6 +835,7 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & (MMIO_STATUS_GALOG_RUN_MASK)) break; + udelay(10); } if (WARN_ON(i >= LOOP_TIMEOUT)) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index f912fe45bea2c005d1073226b175821b6cca99b7..a673195978843e3c0b6f3db2024f4a959c574ceb 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -569,9 +569,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) fn, &intel_ir_domain_ops, iommu); if (!iommu->ir_domain) { - irq_domain_free_fwnode(fn); pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); - goto out_free_bitmap; + goto out_free_fwnode; } iommu->ir_msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain, @@ -595,7 +594,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (dmar_enable_qi(iommu)) { pr_err("Failed to enable queued invalidation\n"); - goto out_free_bitmap; + goto out_free_ir_domain; } } @@ -619,6 +618,14 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) return 0; +out_free_ir_domain: + if (iommu->ir_msi_domain) + irq_domain_remove(iommu->ir_msi_domain); + iommu->ir_msi_domain = NULL; + irq_domain_remove(iommu->ir_domain); + iommu->ir_domain = NULL; +out_free_fwnode: + irq_domain_free_fwnode(fn); out_free_bitmap: bitmap_free(bitmap); out_free_pages: diff --git a/drivers/iommu/ioasid.c b/drivers/iommu/ioasid.c index 50ee27bbd04ec288341df8d8401a02336df9c21c..06fee7416816be4febe052ade0114bb6da1078e5 100644 --- a/drivers/iommu/ioasid.c +++ b/drivers/iommu/ioasid.c @@ -349,6 +349,7 @@ EXPORT_SYMBOL_GPL(ioasid_alloc); /** * ioasid_get - obtain a reference to the IOASID + * @ioasid: the ID to get */ void ioasid_get(ioasid_t ioasid) { diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8b86406b71627c323bac525084e265ae73dd6d83..107dcf5938d6972168a7e89a772ab6a068158844 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -207,9 +207,14 @@ static struct dev_iommu *dev_iommu_get(struct device *dev) static void dev_iommu_free(struct device *dev) { - iommu_fwspec_free(dev); - kfree(dev->iommu); + struct dev_iommu *param = dev->iommu; + dev->iommu = NULL; + if (param->fwspec) { + fwnode_handle_put(param->fwspec->iommu_fwnode); + kfree(param->fwspec); + } + kfree(param); } static int __iommu_probe_device(struct device *dev, struct list_head *group_list) @@ -980,17 +985,6 @@ static int iommu_group_device_count(struct iommu_group *group) return ret; } -/** - * iommu_group_for_each_dev - iterate over each device in the group - * @group: the group - * @data: caller opaque data to be passed to callback function - * @fn: caller supplied callback function - * - * This function is called by group users to iterate over group devices. - * Callers should hold a reference count to the group during callback. - * The group->mutex is held across callbacks, which will block calls to - * iommu_group_add/remove_device. - */ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, int (*fn)(struct device *, void *)) { @@ -1005,7 +999,17 @@ static int __iommu_group_for_each_dev(struct iommu_group *group, void *data, return ret; } - +/** + * iommu_group_for_each_dev - iterate over each device in the group + * @group: the group + * @data: caller opaque data to be passed to callback function + * @fn: caller supplied callback function + * + * This function is called by group users to iterate over group devices. + * Callers should hold a reference count to the group during callback. + * The group->mutex is held across callbacks, which will block calls to + * iommu_group_add/remove_device. + */ int iommu_group_for_each_dev(struct iommu_group *group, void *data, int (*fn)(struct device *, void *)) { @@ -3032,6 +3036,7 @@ EXPORT_SYMBOL_GPL(iommu_aux_get_pasid); * iommu_sva_bind_device() - Bind a process address space to a device * @dev: the device * @mm: the mm to bind, caller must hold a reference to it + * @drvdata: opaque data pointer to pass to bind callback * * Create a bond between device and address space, allowing the device to access * the mm using the returned PASID. If a bond already exists between @device and diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 91749654fd4909d0bfc1e7536908439eeefa7d60..980e4af3f06b6eb338f3a5f9f6859f68a3dc286f 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1085,7 +1085,7 @@ static __maybe_unused int omap_iommu_runtime_resume(struct device *dev) } /** - * omap_iommu_suspend_prepare - prepare() dev_pm_ops implementation + * omap_iommu_prepare - prepare() dev_pm_ops implementation * @dev: iommu device * * This function performs the necessary checks to determine if the IOMMU diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 2543ef65825b903438f16331de4c92f864fe125e..38091ebb940330675c306df56c1e5adc4a893682 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -178,7 +178,6 @@ struct aic_irq_chip { struct irq_domain *hw_domain; struct irq_domain *ipi_domain; int nr_hw; - int ipi_hwirq; }; static DEFINE_PER_CPU(uint32_t, aic_fiq_unmasked); diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index d25b7a864bbbbb5f55bd6363ec2b80a29ce6d196..cd772973114afab89e9afe734fa91053eaa2121b 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -4856,6 +4856,38 @@ static struct syscore_ops its_syscore_ops = { .resume = its_restore_enable, }; +static void __init __iomem *its_map_one(struct resource *res, int *err) +{ + void __iomem *its_base; + u32 val; + + its_base = ioremap(res->start, SZ_64K); + if (!its_base) { + pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start); + *err = -ENOMEM; + return NULL; + } + + val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK; + if (val != 0x30 && val != 0x40) { + pr_warn("ITS@%pa: No ITS detected, giving up\n", &res->start); + *err = -ENODEV; + goto out_unmap; + } + + *err = its_force_quiescent(its_base); + if (*err) { + pr_warn("ITS@%pa: Failed to quiesce, giving up\n", &res->start); + goto out_unmap; + } + + return its_base; + +out_unmap: + iounmap(its_base); + return NULL; +} + static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) { struct irq_domain *inner_domain; @@ -4963,29 +4995,14 @@ static int __init its_probe_one(struct resource *res, { struct its_node *its; void __iomem *its_base; - u32 val, ctlr; u64 baser, tmp, typer; struct page *page; + u32 ctlr; int err; - its_base = ioremap(res->start, SZ_64K); - if (!its_base) { - pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start); - return -ENOMEM; - } - - val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK; - if (val != 0x30 && val != 0x40) { - pr_warn("ITS@%pa: No ITS detected, giving up\n", &res->start); - err = -ENODEV; - goto out_unmap; - } - - err = its_force_quiescent(its_base); - if (err) { - pr_warn("ITS@%pa: Failed to quiesce, giving up\n", &res->start); - goto out_unmap; - } + its_base = its_map_one(res, &err); + if (!its_base) + return err; pr_info("ITS %pR\n", res); @@ -5241,13 +5258,31 @@ static int its_cpu_memreserve_lpi(unsigned int cpu) out: /* Last CPU being brought up gets to issue the cleanup */ - if (cpumask_equal(&cpus_booted_once_mask, cpu_possible_mask)) + if (!IS_ENABLED(CONFIG_SMP) || + cpumask_equal(&cpus_booted_once_mask, cpu_possible_mask)) schedule_work(&rdist_memreserve_cpuhp_cleanup_work); gic_data_rdist()->flags |= RD_LOCAL_MEMRESERVE_DONE; return ret; } +/* Mark all the BASER registers as invalid before they get reprogrammed */ +static int __init its_reset_one(struct resource *res) +{ + void __iomem *its_base; + int err, i; + + its_base = its_map_one(res, &err); + if (!its_base) + return err; + + for (i = 0; i < GITS_BASER_NR_REGS; i++) + gits_write_baser(0, its_base + GITS_BASER + (i << 3)); + + iounmap(its_base); + return 0; +} + static const struct of_device_id its_device_id[] = { { .compatible = "arm,gic-v3-its", }, {}, @@ -5258,6 +5293,26 @@ static int __init its_of_probe(struct device_node *node) struct device_node *np; struct resource res; + /* + * Make sure *all* the ITS are reset before we probe any, as + * they may be sharing memory. If any of the ITS fails to + * reset, don't even try to go any further, as this could + * result in something even worse. + */ + for (np = of_find_matching_node(node, its_device_id); np; + np = of_find_matching_node(np, its_device_id)) { + int err; + + if (!of_device_is_available(np) || + !of_property_read_bool(np, "msi-controller") || + of_address_to_resource(np, 0, &res)) + continue; + + err = its_reset_one(&res); + if (err) + return err; + } + for (np = of_find_matching_node(node, its_device_id); np; np = of_find_matching_node(np, its_device_id)) { if (!of_device_is_available(np)) @@ -5420,11 +5475,35 @@ dom_err: return err; } +static int __init its_acpi_reset(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_generic_translator *its_entry; + struct resource res; + + its_entry = (struct acpi_madt_generic_translator *)header; + res = (struct resource) { + .start = its_entry->base_address, + .end = its_entry->base_address + ACPI_GICV3_ITS_MEM_SIZE - 1, + .flags = IORESOURCE_MEM, + }; + + return its_reset_one(&res); +} + static void __init its_acpi_probe(void) { acpi_table_parse_srat_its(); - acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, - gic_acpi_parse_madt_its, 0); + /* + * Make sure *all* the ITS are reset before we probe any, as + * they may be sharing memory. If any of the ITS fails to + * reset, don't even try to go any further, as this could + * result in something even worse. + */ + if (acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, + its_acpi_reset, 0) > 0) + acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, + gic_acpi_parse_madt_its, 0); acpi_its_srat_maps_free(); } #else @@ -5438,6 +5517,9 @@ int __init its_lpi_memreserve_init(void) if (!efi_enabled(EFI_CONFIG_TABLES)) return 0; + if (list_empty(&its_nodes)) + return 0; + gic_rdists->cpuhp_memreserve_state = CPUHP_INVALID; state = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "irqchip/arm/gicv3/memreserve:online", diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c index 32562b7e681b53ade32eddb92c7bb6908c0bf6d6..e3801c4a77ed2cea3384b9c46606309bf4277837 100644 --- a/drivers/irqchip/irq-loongson-pch-msi.c +++ b/drivers/irqchip/irq-loongson-pch-msi.c @@ -241,7 +241,7 @@ static int pch_msi_init(struct device_node *node, return 0; err_map: - kfree(priv->msi_map); + bitmap_free(priv->msi_map); err_priv: kfree(priv); return ret; diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c index fd9f275592d296a71ea981f931d2c877d8983743..50a56820c99bcf70b2bac17ad32ea37651dc11a5 100644 --- a/drivers/irqchip/irq-realtek-rtl.c +++ b/drivers/irqchip/irq-realtek-rtl.c @@ -62,7 +62,7 @@ static struct irq_chip realtek_ictl_irq = { static int intc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw) { - irq_set_chip_and_handler(hw, &realtek_ictl_irq, handle_level_irq); + irq_set_chip_and_handler(irq, &realtek_ictl_irq, handle_level_irq); return 0; } @@ -76,16 +76,20 @@ static void realtek_irq_dispatch(struct irq_desc *desc) { struct irq_chip *chip = irq_desc_get_chip(desc); struct irq_domain *domain; - unsigned int pending; + unsigned long pending; + unsigned int soc_int; chained_irq_enter(chip, desc); pending = readl(REG(RTL_ICTL_GIMR)) & readl(REG(RTL_ICTL_GISR)); + if (unlikely(!pending)) { spurious_interrupt(); goto out; } + domain = irq_desc_get_handler_data(desc); - generic_handle_domain_irq(domain, __ffs(pending)); + for_each_set_bit(soc_int, &pending, 32) + generic_handle_domain_irq(domain, soc_int); out: chained_irq_exit(chip, desc); @@ -95,7 +99,8 @@ out: * SoC interrupts are cascaded to MIPS CPU interrupts according to the * interrupt-map in the device tree. Each SoC interrupt gets 4 bits for * the CPU interrupt in an Interrupt Routing Register. Max 32 SoC interrupts - * thus go into 4 IRRs. + * thus go into 4 IRRs. A routing value of '0' means the interrupt is left + * disconnected. Routing values {1..15} connect to output lines {0..14}. */ static int __init map_interrupts(struct device_node *node, struct irq_domain *domain) { @@ -134,7 +139,7 @@ static int __init map_interrupts(struct device_node *node, struct irq_domain *do of_node_put(cpu_ictl); cpu_int = be32_to_cpup(imap + 2); - if (cpu_int > 7) + if (cpu_int > 7 || cpu_int < 2) return -EINVAL; if (!(mips_irqs_set & BIT(cpu_int))) { @@ -143,7 +148,8 @@ static int __init map_interrupts(struct device_node *node, struct irq_domain *do mips_irqs_set |= BIT(cpu_int); } - regs[(soc_int * 4) / 32] |= cpu_int << (soc_int * 4) % 32; + /* Use routing values (1..6) for CPU interrupts (2..7) */ + regs[(soc_int * 4) / 32] |= (cpu_int - 1) << (soc_int * 4) % 32; imap += 3; } diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 259065d271ef0440fd47dfd78d38740c84009c07..09cc98266d30f9439fe1a383a2470b68c7416981 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -398,3 +398,4 @@ out_free_priv: IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init); IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */ +IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_init); /* for firmware driver */ diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c index 28b8581b44dda63af4b749167df9f63864e629c2..d8c4d5664145d285cad940f64fbc58eb74ade561 100644 --- a/drivers/macintosh/mac_hid.c +++ b/drivers/macintosh/mac_hid.c @@ -239,33 +239,11 @@ static struct ctl_table mac_hid_files[] = { { } }; -/* dir in /proc/sys/dev */ -static struct ctl_table mac_hid_dir[] = { - { - .procname = "mac_hid", - .maxlen = 0, - .mode = 0555, - .child = mac_hid_files, - }, - { } -}; - -/* /proc/sys/dev itself, in case that is not there yet */ -static struct ctl_table mac_hid_root_dir[] = { - { - .procname = "dev", - .maxlen = 0, - .mode = 0555, - .child = mac_hid_dir, - }, - { } -}; - static struct ctl_table_header *mac_hid_sysctl_header; static int __init mac_hid_init(void) { - mac_hid_sysctl_header = register_sysctl_table(mac_hid_root_dir); + mac_hid_sysctl_header = register_sysctl("dev/mac_hid", mac_hid_files); if (!mac_hid_sysctl_header) return -ENOMEM; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c0ae8087c60276ac8495d876bd686b43f9e78f59..dcbd6d201619d0fd62931af3de1e073648852d1f 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -489,7 +489,7 @@ static void start_io_acct(struct dm_io *io) struct mapped_device *md = io->md; struct bio *bio = io->orig_bio; - io->start_time = bio_start_io_acct(bio); + bio_start_io_acct_time(bio, io->start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), @@ -535,7 +535,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) io->md = md; spin_lock_init(&io->endio_lock); - start_io_acct(io); + io->start_time = jiffies; return io; } @@ -1442,9 +1442,6 @@ static void init_clone_info(struct clone_info *ci, struct mapped_device *md, ci->sector = bio->bi_iter.bi_sector; } -#define __dm_part_stat_sub(part, field, subnd) \ - (part_stat_get(part, field) -= (subnd)) - /* * Entry point to split a bio into clones and submit them to the targets. */ @@ -1480,23 +1477,12 @@ static void __split_and_process_bio(struct mapped_device *md, GFP_NOIO, &md->queue->bio_split); ci.io->orig_bio = b; - /* - * Adjust IO stats for each split, otherwise upon queue - * reentry there will be redundant IO accounting. - * NOTE: this is a stop-gap fix, a proper fix involves - * significant refactoring of DM core's bio splitting - * (by eliminating DM's splitting and just using bio_split) - */ - part_stat_lock(); - __dm_part_stat_sub(dm_disk(md)->part0, - sectors[op_stat_group(bio_op(bio))], ci.sector_count); - part_stat_unlock(); - bio_chain(b, bio); trace_block_split(b, bio->bi_iter.bi_sector); submit_bio_noacct(bio); } } + start_io_acct(ci.io); /* drop the extra reference count */ dm_io_dec_pending(ci.io, errno_to_blk_status(error)); diff --git a/drivers/md/md.c b/drivers/md/md.c index 5881d05a76ebc706e2fc7fe342c98023d1c7258b..4d38bd7dadd604acdc9832d62c81c913abddc07d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5869,10 +5869,6 @@ int md_run(struct mddev *mddev) nowait = nowait && blk_queue_nowait(bdev_get_queue(rdev->bdev)); } - /* Set the NOWAIT flags if all underlying devices support it */ - if (nowait) - blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue); - if (!bioset_initialized(&mddev->bio_set)) { err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); if (err) @@ -6010,6 +6006,10 @@ int md_run(struct mddev *mddev) else blk_queue_flag_clear(QUEUE_FLAG_NONROT, mddev->queue); blk_queue_flag_set(QUEUE_FLAG_IO_STAT, mddev->queue); + + /* Set the NOWAIT flags if all underlying devices support it */ + if (nowait) + blk_queue_flag_set(QUEUE_FLAG_NOWAIT, mddev->queue); } if (pers->sync_request) { if (mddev->kobj.sd && diff --git a/drivers/media/cec/core/cec-core.c b/drivers/media/cec/core/cec-core.c index ec67065d52021c3db57411e24c59cb968a0c135e..a3ab6a43fb14af6e8974a83e2d65397854243e36 100644 --- a/drivers/media/cec/core/cec-core.c +++ b/drivers/media/cec/core/cec-core.c @@ -106,7 +106,7 @@ static int __must_check cec_devnode_register(struct cec_devnode *devnode, /* Part 1: Find a free minor number */ mutex_lock(&cec_devnode_lock); - minor = find_next_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES, 0); + minor = find_first_zero_bit(cec_devnode_nums, CEC_NUM_DEVICES); if (minor == CEC_NUM_DEVICES) { mutex_unlock(&cec_devnode_lock); pr_err("could not get a free minor\n"); diff --git a/drivers/media/mc/mc-devnode.c b/drivers/media/mc/mc-devnode.c index f11382afe23bf280736244026890924d4f4729ba..680fbb3a9340224b6315a5b401c7489002a837ba 100644 --- a/drivers/media/mc/mc-devnode.c +++ b/drivers/media/mc/mc-devnode.c @@ -217,7 +217,7 @@ int __must_check media_devnode_register(struct media_device *mdev, /* Part 1: Find a free minor number */ mutex_lock(&media_devnode_lock); - minor = find_next_zero_bit(media_devnode_nums, MEDIA_NUM_DEVICES, 0); + minor = find_first_zero_bit(media_devnode_nums, MEDIA_NUM_DEVICES); if (minor == MEDIA_NUM_DEVICES) { mutex_unlock(&media_devnode_lock); pr_err("could not get a free minor\n"); diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 24a4532053e4b94daa6382939bb92cbe426ea92d..e90adfa5795050bdd41a48005d088e793d9bcd24 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -300,8 +300,8 @@ mpt_is_discovery_complete(MPT_ADAPTER *ioc) if (!hdr.ExtPageLength) goto out; - buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) goto out; @@ -316,8 +316,8 @@ mpt_is_discovery_complete(MPT_ADAPTER *ioc) rc = 1; out_free_consistent: - pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - buffer, dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer, + dma_handle); out: return rc; } @@ -1661,16 +1661,14 @@ mpt_mapresources(MPT_ADAPTER *ioc) const uint64_t required_mask = dma_get_required_mask (&pdev->dev); if (required_mask > DMA_BIT_MASK(32) - && !pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) - && !pci_set_consistent_dma_mask(pdev, - DMA_BIT_MASK(64))) { + && !dma_set_mask(&pdev->dev, DMA_BIT_MASK(64)) + && !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64))) { ioc->dma_mask = DMA_BIT_MASK(64); dinitprintk(ioc, printk(MYIOC_s_INFO_FMT ": 64 BIT PCI BUS DMA ADDRESSING SUPPORTED\n", ioc->name)); - } else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) - && !pci_set_consistent_dma_mask(pdev, - DMA_BIT_MASK(32))) { + } else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) + && !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) { ioc->dma_mask = DMA_BIT_MASK(32); dinitprintk(ioc, printk(MYIOC_s_INFO_FMT ": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n", @@ -1681,9 +1679,8 @@ mpt_mapresources(MPT_ADAPTER *ioc) goto out_pci_release_region; } } else { - if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) - && !pci_set_consistent_dma_mask(pdev, - DMA_BIT_MASK(32))) { + if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)) + && !dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32))) { ioc->dma_mask = DMA_BIT_MASK(32); dinitprintk(ioc, printk(MYIOC_s_INFO_FMT ": 32 BIT PCI BUS DMA ADDRESSING SUPPORTED\n", @@ -2769,9 +2766,9 @@ mpt_adapter_disable(MPT_ADAPTER *ioc) if (ioc->spi_data.pIocPg4 != NULL) { sz = ioc->spi_data.IocPg4Sz; - pci_free_consistent(ioc->pcidev, sz, - ioc->spi_data.pIocPg4, - ioc->spi_data.IocPg4_dma); + dma_free_coherent(&ioc->pcidev->dev, sz, + ioc->spi_data.pIocPg4, + ioc->spi_data.IocPg4_dma); ioc->spi_data.pIocPg4 = NULL; ioc->alloc_total -= sz; } @@ -3515,7 +3512,8 @@ mpt_alloc_fw_memory(MPT_ADAPTER *ioc, int size) rc = 0; goto out; } - ioc->cached_fw = pci_alloc_consistent(ioc->pcidev, size, &ioc->cached_fw_dma); + ioc->cached_fw = dma_alloc_coherent(&ioc->pcidev->dev, size, + &ioc->cached_fw_dma, GFP_ATOMIC); if (!ioc->cached_fw) { printk(MYIOC_s_ERR_FMT "Unable to allocate memory for the cached firmware image!\n", ioc->name); @@ -3548,7 +3546,8 @@ mpt_free_fw_memory(MPT_ADAPTER *ioc) sz = ioc->facts.FWImageSize; dinitprintk(ioc, printk(MYIOC_s_DEBUG_FMT "free_fw_memory: FW Image @ %p[%p], sz=%d[%x] bytes\n", ioc->name, ioc->cached_fw, (void *)(ulong)ioc->cached_fw_dma, sz, sz)); - pci_free_consistent(ioc->pcidev, sz, ioc->cached_fw, ioc->cached_fw_dma); + dma_free_coherent(&ioc->pcidev->dev, sz, ioc->cached_fw, + ioc->cached_fw_dma); ioc->alloc_total -= sz; ioc->cached_fw = NULL; } @@ -4447,9 +4446,8 @@ PrimeIocFifos(MPT_ADAPTER *ioc) */ if (ioc->pcidev->device == MPI_MANUFACTPAGE_DEVID_SAS1078 && ioc->dma_mask > DMA_BIT_MASK(35)) { - if (!pci_set_dma_mask(ioc->pcidev, DMA_BIT_MASK(32)) - && !pci_set_consistent_dma_mask(ioc->pcidev, - DMA_BIT_MASK(32))) { + if (!dma_set_mask(&ioc->pcidev->dev, DMA_BIT_MASK(32)) + && !dma_set_coherent_mask(&ioc->pcidev->dev, DMA_BIT_MASK(32))) { dma_mask = DMA_BIT_MASK(35); d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT "setting 35 bit addressing for " @@ -4457,10 +4455,10 @@ PrimeIocFifos(MPT_ADAPTER *ioc) ioc->name)); } else { /*Reseting DMA mask to 64 bit*/ - pci_set_dma_mask(ioc->pcidev, - DMA_BIT_MASK(64)); - pci_set_consistent_dma_mask(ioc->pcidev, - DMA_BIT_MASK(64)); + dma_set_mask(&ioc->pcidev->dev, + DMA_BIT_MASK(64)); + dma_set_coherent_mask(&ioc->pcidev->dev, + DMA_BIT_MASK(64)); printk(MYIOC_s_ERR_FMT "failed setting 35 bit addressing for " @@ -4595,8 +4593,8 @@ PrimeIocFifos(MPT_ADAPTER *ioc) alloc_dma += ioc->reply_sz; } - if (dma_mask == DMA_BIT_MASK(35) && !pci_set_dma_mask(ioc->pcidev, - ioc->dma_mask) && !pci_set_consistent_dma_mask(ioc->pcidev, + if (dma_mask == DMA_BIT_MASK(35) && !dma_set_mask(&ioc->pcidev->dev, + ioc->dma_mask) && !dma_set_coherent_mask(&ioc->pcidev->dev, ioc->dma_mask)) d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT "restoring 64 bit addressing\n", ioc->name)); @@ -4620,8 +4618,8 @@ out_fail: ioc->sense_buf_pool = NULL; } - if (dma_mask == DMA_BIT_MASK(35) && !pci_set_dma_mask(ioc->pcidev, - DMA_BIT_MASK(64)) && !pci_set_consistent_dma_mask(ioc->pcidev, + if (dma_mask == DMA_BIT_MASK(35) && !dma_set_mask(&ioc->pcidev->dev, + DMA_BIT_MASK(64)) && !dma_set_coherent_mask(&ioc->pcidev->dev, DMA_BIT_MASK(64))) d36memprintk(ioc, printk(MYIOC_s_DEBUG_FMT "restoring 64 bit addressing\n", ioc->name)); @@ -4968,7 +4966,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc) if (hdr.PageLength > 0) { data_sz = hdr.PageLength * 4; - ppage0_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page0_dma); + ppage0_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz, + &page0_dma, GFP_KERNEL); rc = -ENOMEM; if (ppage0_alloc) { memset((u8 *)ppage0_alloc, 0, data_sz); @@ -4982,7 +4981,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc) } - pci_free_consistent(ioc->pcidev, data_sz, (u8 *) ppage0_alloc, page0_dma); + dma_free_coherent(&ioc->pcidev->dev, data_sz, + (u8 *)ppage0_alloc, page0_dma); /* FIXME! * Normalize endianness of structure data, @@ -5014,7 +5014,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc) data_sz = hdr.PageLength * 4; rc = -ENOMEM; - ppage1_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page1_dma); + ppage1_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz, + &page1_dma, GFP_KERNEL); if (ppage1_alloc) { memset((u8 *)ppage1_alloc, 0, data_sz); cfg.physAddr = page1_dma; @@ -5026,7 +5027,8 @@ GetLanConfigPages(MPT_ADAPTER *ioc) memcpy(&ioc->lan_cnfg_page1, ppage1_alloc, copy_sz); } - pci_free_consistent(ioc->pcidev, data_sz, (u8 *) ppage1_alloc, page1_dma); + dma_free_coherent(&ioc->pcidev->dev, data_sz, + (u8 *)ppage1_alloc, page1_dma); /* FIXME! * Normalize endianness of structure data, @@ -5315,7 +5317,8 @@ GetIoUnitPage2(MPT_ADAPTER *ioc) /* Read the config page */ data_sz = hdr.PageLength * 4; rc = -ENOMEM; - ppage_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page_dma); + ppage_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz, + &page_dma, GFP_KERNEL); if (ppage_alloc) { memset((u8 *)ppage_alloc, 0, data_sz); cfg.physAddr = page_dma; @@ -5325,7 +5328,8 @@ GetIoUnitPage2(MPT_ADAPTER *ioc) if ((rc = mpt_config(ioc, &cfg)) == 0) ioc->biosVersion = le32_to_cpu(ppage_alloc->BiosVersion); - pci_free_consistent(ioc->pcidev, data_sz, (u8 *) ppage_alloc, page_dma); + dma_free_coherent(&ioc->pcidev->dev, data_sz, + (u8 *)ppage_alloc, page_dma); } return rc; @@ -5400,7 +5404,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum) return -EFAULT; if (header.PageLength > 0) { - pbuf = pci_alloc_consistent(ioc->pcidev, header.PageLength * 4, &buf_dma); + pbuf = dma_alloc_coherent(&ioc->pcidev->dev, + header.PageLength * 4, &buf_dma, + GFP_KERNEL); if (pbuf) { cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT; cfg.physAddr = buf_dma; @@ -5456,7 +5462,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum) } } if (pbuf) { - pci_free_consistent(ioc->pcidev, header.PageLength * 4, pbuf, buf_dma); + dma_free_coherent(&ioc->pcidev->dev, + header.PageLength * 4, pbuf, + buf_dma); } } } @@ -5478,7 +5486,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum) if (header.PageLength > 0) { /* Allocate memory and read SCSI Port Page 2 */ - pbuf = pci_alloc_consistent(ioc->pcidev, header.PageLength * 4, &buf_dma); + pbuf = dma_alloc_coherent(&ioc->pcidev->dev, + header.PageLength * 4, &buf_dma, + GFP_KERNEL); if (pbuf) { cfg.action = MPI_CONFIG_ACTION_PAGE_READ_NVRAM; cfg.physAddr = buf_dma; @@ -5543,7 +5553,9 @@ mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum) } } - pci_free_consistent(ioc->pcidev, header.PageLength * 4, pbuf, buf_dma); + dma_free_coherent(&ioc->pcidev->dev, + header.PageLength * 4, pbuf, + buf_dma); } } @@ -5659,8 +5671,8 @@ mpt_inactive_raid_volumes(MPT_ADAPTER *ioc, u8 channel, u8 id) if (!hdr.PageLength) goto out; - buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) goto out; @@ -5707,8 +5719,8 @@ mpt_inactive_raid_volumes(MPT_ADAPTER *ioc, u8 channel, u8 id) out: if (buffer) - pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer, - dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + buffer, dma_handle); } /** @@ -5752,8 +5764,8 @@ mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num, goto out; } - buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) { rc = -ENOMEM; @@ -5776,8 +5788,8 @@ mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num, out: if (buffer) - pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer, - dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + buffer, dma_handle); return rc; } @@ -5819,8 +5831,8 @@ mpt_raid_phys_disk_get_num_paths(MPT_ADAPTER *ioc, u8 phys_disk_num) goto out; } - buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) { rc = 0; @@ -5840,8 +5852,8 @@ mpt_raid_phys_disk_get_num_paths(MPT_ADAPTER *ioc, u8 phys_disk_num) out: if (buffer) - pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer, - dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + buffer, dma_handle); return rc; } @@ -5891,8 +5903,8 @@ mpt_raid_phys_disk_pg1(MPT_ADAPTER *ioc, u8 phys_disk_num, goto out; } - buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) { rc = -ENOMEM; @@ -5929,8 +5941,8 @@ mpt_raid_phys_disk_pg1(MPT_ADAPTER *ioc, u8 phys_disk_num, out: if (buffer) - pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer, - dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + buffer, dma_handle); return rc; } @@ -5986,7 +5998,8 @@ mpt_findImVolumes(MPT_ADAPTER *ioc) return -EFAULT; iocpage2sz = header.PageLength * 4; - pIoc2 = pci_alloc_consistent(ioc->pcidev, iocpage2sz, &ioc2_dma); + pIoc2 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage2sz, &ioc2_dma, + GFP_KERNEL); if (!pIoc2) return -ENOMEM; @@ -6011,7 +6024,7 @@ mpt_findImVolumes(MPT_ADAPTER *ioc) pIoc2->RaidVolume[i].VolumeID); out: - pci_free_consistent(ioc->pcidev, iocpage2sz, pIoc2, ioc2_dma); + dma_free_coherent(&ioc->pcidev->dev, iocpage2sz, pIoc2, ioc2_dma); return rc; } @@ -6053,7 +6066,8 @@ mpt_read_ioc_pg_3(MPT_ADAPTER *ioc) /* Read Header good, alloc memory */ iocpage3sz = header.PageLength * 4; - pIoc3 = pci_alloc_consistent(ioc->pcidev, iocpage3sz, &ioc3_dma); + pIoc3 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage3sz, &ioc3_dma, + GFP_KERNEL); if (!pIoc3) return 0; @@ -6070,7 +6084,7 @@ mpt_read_ioc_pg_3(MPT_ADAPTER *ioc) } } - pci_free_consistent(ioc->pcidev, iocpage3sz, pIoc3, ioc3_dma); + dma_free_coherent(&ioc->pcidev->dev, iocpage3sz, pIoc3, ioc3_dma); return 0; } @@ -6104,7 +6118,8 @@ mpt_read_ioc_pg_4(MPT_ADAPTER *ioc) if ( (pIoc4 = ioc->spi_data.pIocPg4) == NULL ) { iocpage4sz = (header.PageLength + 4) * 4; /* Allow 4 additional SEP's */ - pIoc4 = pci_alloc_consistent(ioc->pcidev, iocpage4sz, &ioc4_dma); + pIoc4 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage4sz, + &ioc4_dma, GFP_KERNEL); if (!pIoc4) return; ioc->alloc_total += iocpage4sz; @@ -6122,7 +6137,8 @@ mpt_read_ioc_pg_4(MPT_ADAPTER *ioc) ioc->spi_data.IocPg4_dma = ioc4_dma; ioc->spi_data.IocPg4Sz = iocpage4sz; } else { - pci_free_consistent(ioc->pcidev, iocpage4sz, pIoc4, ioc4_dma); + dma_free_coherent(&ioc->pcidev->dev, iocpage4sz, pIoc4, + ioc4_dma); ioc->spi_data.pIocPg4 = NULL; ioc->alloc_total -= iocpage4sz; } @@ -6159,7 +6175,8 @@ mpt_read_ioc_pg_1(MPT_ADAPTER *ioc) /* Read Header good, alloc memory */ iocpage1sz = header.PageLength * 4; - pIoc1 = pci_alloc_consistent(ioc->pcidev, iocpage1sz, &ioc1_dma); + pIoc1 = dma_alloc_coherent(&ioc->pcidev->dev, iocpage1sz, &ioc1_dma, + GFP_KERNEL); if (!pIoc1) return; @@ -6210,7 +6227,7 @@ mpt_read_ioc_pg_1(MPT_ADAPTER *ioc) } } - pci_free_consistent(ioc->pcidev, iocpage1sz, pIoc1, ioc1_dma); + dma_free_coherent(&ioc->pcidev->dev, iocpage1sz, pIoc1, ioc1_dma); return; } @@ -6239,7 +6256,8 @@ mpt_get_manufacturing_pg_0(MPT_ADAPTER *ioc) goto out; cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT; - pbuf = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, &buf_dma); + pbuf = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + &buf_dma, GFP_KERNEL); if (!pbuf) goto out; @@ -6255,7 +6273,8 @@ mpt_get_manufacturing_pg_0(MPT_ADAPTER *ioc) out: if (pbuf) - pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, pbuf, buf_dma); + dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, pbuf, + buf_dma); } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index ae433c150b37aa0bf9b5267bfbf8d3fe6f69697d..03c8fb1795c2cde9559cb44a83e43e70ef0efa1e 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -1041,14 +1041,15 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags, * copying the data in this array into the correct place in the * request and chain buffers. */ - sglbuf = pci_alloc_consistent(ioc->pcidev, MAX_SGL_BYTES, sglbuf_dma); + sglbuf = dma_alloc_coherent(&ioc->pcidev->dev, MAX_SGL_BYTES, + sglbuf_dma, GFP_KERNEL); if (sglbuf == NULL) goto free_and_fail; if (sgdir & 0x04000000) - dir = PCI_DMA_TODEVICE; + dir = DMA_TO_DEVICE; else - dir = PCI_DMA_FROMDEVICE; + dir = DMA_FROM_DEVICE; /* At start: * sgl = sglbuf = point to beginning of sg buffer @@ -1062,9 +1063,9 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags, while (bytes_allocd < bytes) { this_alloc = min(alloc_sz, bytes-bytes_allocd); buflist[buflist_ent].len = this_alloc; - buflist[buflist_ent].kptr = pci_alloc_consistent(ioc->pcidev, - this_alloc, - &pa); + buflist[buflist_ent].kptr = dma_alloc_coherent(&ioc->pcidev->dev, + this_alloc, + &pa, GFP_KERNEL); if (buflist[buflist_ent].kptr == NULL) { alloc_sz = alloc_sz / 2; if (alloc_sz == 0) { @@ -1080,8 +1081,9 @@ kbuf_alloc_2_sgl(int bytes, u32 sgdir, int sge_offset, int *frags, bytes_allocd += this_alloc; sgl->FlagsLength = (0x10000000|sgdir|this_alloc); - dma_addr = pci_map_single(ioc->pcidev, - buflist[buflist_ent].kptr, this_alloc, dir); + dma_addr = dma_map_single(&ioc->pcidev->dev, + buflist[buflist_ent].kptr, + this_alloc, dir); sgl->Address = dma_addr; fragcnt++; @@ -1140,9 +1142,11 @@ free_and_fail: kptr = buflist[i].kptr; len = buflist[i].len; - pci_free_consistent(ioc->pcidev, len, kptr, dma_addr); + dma_free_coherent(&ioc->pcidev->dev, len, kptr, + dma_addr); } - pci_free_consistent(ioc->pcidev, MAX_SGL_BYTES, sglbuf, *sglbuf_dma); + dma_free_coherent(&ioc->pcidev->dev, MAX_SGL_BYTES, sglbuf, + *sglbuf_dma); } kfree(buflist); return NULL; @@ -1162,9 +1166,9 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE int n = 0; if (sg->FlagsLength & 0x04000000) - dir = PCI_DMA_TODEVICE; + dir = DMA_TO_DEVICE; else - dir = PCI_DMA_FROMDEVICE; + dir = DMA_FROM_DEVICE; nib = (sg->FlagsLength & 0xF0000000) >> 28; while (! (nib & 0x4)) { /* eob */ @@ -1179,8 +1183,10 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE dma_addr = sg->Address; kptr = bl->kptr; len = bl->len; - pci_unmap_single(ioc->pcidev, dma_addr, len, dir); - pci_free_consistent(ioc->pcidev, len, kptr, dma_addr); + dma_unmap_single(&ioc->pcidev->dev, dma_addr, len, + dir); + dma_free_coherent(&ioc->pcidev->dev, len, kptr, + dma_addr); n++; } sg++; @@ -1197,12 +1203,12 @@ kfree_sgl(MptSge_t *sgl, dma_addr_t sgl_dma, struct buflist *buflist, MPT_ADAPTE dma_addr = sg->Address; kptr = bl->kptr; len = bl->len; - pci_unmap_single(ioc->pcidev, dma_addr, len, dir); - pci_free_consistent(ioc->pcidev, len, kptr, dma_addr); + dma_unmap_single(&ioc->pcidev->dev, dma_addr, len, dir); + dma_free_coherent(&ioc->pcidev->dev, len, kptr, dma_addr); n++; } - pci_free_consistent(ioc->pcidev, MAX_SGL_BYTES, sgl, sgl_dma); + dma_free_coherent(&ioc->pcidev->dev, MAX_SGL_BYTES, sgl, sgl_dma); kfree(buflist); dctlprintk(ioc, printk(MYIOC_s_DEBUG_FMT "-SG: Free'd 1 SGL buf + %d kbufs!\n", ioc->name, n)); @@ -2100,8 +2106,9 @@ mptctl_do_mpt_command (MPT_ADAPTER *ioc, struct mpt_ioctl_command karg, void __u } flagsLength |= karg.dataOutSize; bufOut.len = karg.dataOutSize; - bufOut.kptr = pci_alloc_consistent( - ioc->pcidev, bufOut.len, &dma_addr_out); + bufOut.kptr = dma_alloc_coherent(&ioc->pcidev->dev, + bufOut.len, + &dma_addr_out, GFP_KERNEL); if (bufOut.kptr == NULL) { rc = -ENOMEM; @@ -2134,8 +2141,9 @@ mptctl_do_mpt_command (MPT_ADAPTER *ioc, struct mpt_ioctl_command karg, void __u flagsLength |= karg.dataInSize; bufIn.len = karg.dataInSize; - bufIn.kptr = pci_alloc_consistent(ioc->pcidev, - bufIn.len, &dma_addr_in); + bufIn.kptr = dma_alloc_coherent(&ioc->pcidev->dev, + bufIn.len, + &dma_addr_in, GFP_KERNEL); if (bufIn.kptr == NULL) { rc = -ENOMEM; @@ -2283,13 +2291,13 @@ done_free_mem: /* Free the allocated memory. */ if (bufOut.kptr != NULL) { - pci_free_consistent(ioc->pcidev, - bufOut.len, (void *) bufOut.kptr, dma_addr_out); + dma_free_coherent(&ioc->pcidev->dev, bufOut.len, + (void *)bufOut.kptr, dma_addr_out); } if (bufIn.kptr != NULL) { - pci_free_consistent(ioc->pcidev, - bufIn.len, (void *) bufIn.kptr, dma_addr_in); + dma_free_coherent(&ioc->pcidev->dev, bufIn.len, + (void *)bufIn.kptr, dma_addr_in); } /* mf is null if command issued successfully @@ -2395,7 +2403,9 @@ mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size) /* Issue the second config page request */ cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT; - pbuf = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, &buf_dma); + pbuf = dma_alloc_coherent(&ioc->pcidev->dev, + hdr.PageLength * 4, + &buf_dma, GFP_KERNEL); if (pbuf) { cfg.physAddr = buf_dma; if (mpt_config(ioc, &cfg) == 0) { @@ -2405,7 +2415,9 @@ mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size) pdata->BoardTracerNumber, 24); } } - pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, pbuf, buf_dma); + dma_free_coherent(&ioc->pcidev->dev, + hdr.PageLength * 4, pbuf, + buf_dma); pbuf = NULL; } } @@ -2470,7 +2482,7 @@ mptctl_hp_hostinfo(MPT_ADAPTER *ioc, unsigned long arg, unsigned int data_size) else IstwiRWRequest->DeviceAddr = 0xB0; - pbuf = pci_alloc_consistent(ioc->pcidev, 4, &buf_dma); + pbuf = dma_alloc_coherent(&ioc->pcidev->dev, 4, &buf_dma, GFP_KERNEL); if (!pbuf) goto out; ioc->add_sge((char *)&IstwiRWRequest->SGL, @@ -2519,7 +2531,7 @@ retry_wait: SET_MGMT_MSG_CONTEXT(ioc->ioctl_cmds.msg_context, 0); if (pbuf) - pci_free_consistent(ioc->pcidev, 4, pbuf, buf_dma); + dma_free_coherent(&ioc->pcidev->dev, 4, pbuf, buf_dma); /* Copy the data from kernel memory to user memory */ @@ -2585,7 +2597,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg) /* Get the data transfer speeds */ data_sz = ioc->spi_data.sdp0length * 4; - pg0_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page_dma); + pg0_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz, &page_dma, + GFP_KERNEL); if (pg0_alloc) { hdr.PageVersion = ioc->spi_data.sdp0version; hdr.PageLength = data_sz; @@ -2623,7 +2636,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg) karg.negotiated_speed = HP_DEV_SPEED_ASYNC; } - pci_free_consistent(ioc->pcidev, data_sz, (u8 *) pg0_alloc, page_dma); + dma_free_coherent(&ioc->pcidev->dev, data_sz, (u8 *)pg0_alloc, + page_dma); } /* Set defaults @@ -2649,7 +2663,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg) /* Issue the second config page request */ cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT; data_sz = (int) cfg.cfghdr.hdr->PageLength * 4; - pg3_alloc = pci_alloc_consistent(ioc->pcidev, data_sz, &page_dma); + pg3_alloc = dma_alloc_coherent(&ioc->pcidev->dev, data_sz, + &page_dma, GFP_KERNEL); if (pg3_alloc) { cfg.physAddr = page_dma; cfg.pageAddr = (karg.hdr.channel << 8) | karg.hdr.id; @@ -2658,7 +2673,8 @@ mptctl_hp_targetinfo(MPT_ADAPTER *ioc, unsigned long arg) karg.phase_errors = (u32) le16_to_cpu(pg3_alloc->PhaseErrorCount); karg.parity_errors = (u32) le16_to_cpu(pg3_alloc->ParityErrorCount); } - pci_free_consistent(ioc->pcidev, data_sz, (u8 *) pg3_alloc, page_dma); + dma_free_coherent(&ioc->pcidev->dev, data_sz, + (u8 *)pg3_alloc, page_dma); } } hd = shost_priv(ioc->sh); diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c index 117fa4ebf6d79d836899db56e6cb15bd1d05ae22..142eb5d5d9df69e8c974112b2a649acb8194635b 100644 --- a/drivers/message/fusion/mptlan.c +++ b/drivers/message/fusion/mptlan.c @@ -516,9 +516,9 @@ mpt_lan_close(struct net_device *dev) if (priv->RcvCtl[i].skb != NULL) { /**/ dlprintk((KERN_INFO MYNAM "/lan_close: bucket %05x " /**/ "is still out\n", i)); - pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[i].dma, - priv->RcvCtl[i].len, - PCI_DMA_FROMDEVICE); + dma_unmap_single(&mpt_dev->pcidev->dev, + priv->RcvCtl[i].dma, + priv->RcvCtl[i].len, DMA_FROM_DEVICE); dev_kfree_skb(priv->RcvCtl[i].skb); } } @@ -528,9 +528,9 @@ mpt_lan_close(struct net_device *dev) for (i = 0; i < priv->tx_max_out; i++) { if (priv->SendCtl[i].skb != NULL) { - pci_unmap_single(mpt_dev->pcidev, priv->SendCtl[i].dma, - priv->SendCtl[i].len, - PCI_DMA_TODEVICE); + dma_unmap_single(&mpt_dev->pcidev->dev, + priv->SendCtl[i].dma, + priv->SendCtl[i].len, DMA_TO_DEVICE); dev_kfree_skb(priv->SendCtl[i].skb); } } @@ -582,8 +582,8 @@ mpt_lan_send_turbo(struct net_device *dev, u32 tmsg) __func__, sent)); priv->SendCtl[ctx].skb = NULL; - pci_unmap_single(mpt_dev->pcidev, priv->SendCtl[ctx].dma, - priv->SendCtl[ctx].len, PCI_DMA_TODEVICE); + dma_unmap_single(&mpt_dev->pcidev->dev, priv->SendCtl[ctx].dma, + priv->SendCtl[ctx].len, DMA_TO_DEVICE); dev_kfree_skb_irq(sent); spin_lock_irqsave(&priv->txfidx_lock, flags); @@ -648,8 +648,9 @@ mpt_lan_send_reply(struct net_device *dev, LANSendReply_t *pSendRep) __func__, sent)); priv->SendCtl[ctx].skb = NULL; - pci_unmap_single(mpt_dev->pcidev, priv->SendCtl[ctx].dma, - priv->SendCtl[ctx].len, PCI_DMA_TODEVICE); + dma_unmap_single(&mpt_dev->pcidev->dev, + priv->SendCtl[ctx].dma, + priv->SendCtl[ctx].len, DMA_TO_DEVICE); dev_kfree_skb_irq(sent); priv->mpt_txfidx[++priv->mpt_txfidx_tail] = ctx; @@ -720,8 +721,8 @@ mpt_lan_sdu_send (struct sk_buff *skb, struct net_device *dev) skb_reset_mac_header(skb); skb_pull(skb, 12); - dma = pci_map_single(mpt_dev->pcidev, skb->data, skb->len, - PCI_DMA_TODEVICE); + dma = dma_map_single(&mpt_dev->pcidev->dev, skb->data, skb->len, + DMA_TO_DEVICE); priv->SendCtl[ctx].skb = skb; priv->SendCtl[ctx].dma = dma; @@ -868,13 +869,17 @@ mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg) return -ENOMEM; } - pci_dma_sync_single_for_cpu(mpt_dev->pcidev, priv->RcvCtl[ctx].dma, - priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE); + dma_sync_single_for_cpu(&mpt_dev->pcidev->dev, + priv->RcvCtl[ctx].dma, + priv->RcvCtl[ctx].len, + DMA_FROM_DEVICE); skb_copy_from_linear_data(old_skb, skb_put(skb, len), len); - pci_dma_sync_single_for_device(mpt_dev->pcidev, priv->RcvCtl[ctx].dma, - priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE); + dma_sync_single_for_device(&mpt_dev->pcidev->dev, + priv->RcvCtl[ctx].dma, + priv->RcvCtl[ctx].len, + DMA_FROM_DEVICE); goto out; } @@ -882,8 +887,8 @@ mpt_lan_receive_post_turbo(struct net_device *dev, u32 tmsg) priv->RcvCtl[ctx].skb = NULL; - pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma, - priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE); + dma_unmap_single(&mpt_dev->pcidev->dev, priv->RcvCtl[ctx].dma, + priv->RcvCtl[ctx].len, DMA_FROM_DEVICE); out: spin_lock_irqsave(&priv->rxfidx_lock, flags); @@ -927,8 +932,8 @@ mpt_lan_receive_post_free(struct net_device *dev, // dlprintk((KERN_INFO MYNAM "@rpr[2] TC + 3\n")); priv->RcvCtl[ctx].skb = NULL; - pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma, - priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE); + dma_unmap_single(&mpt_dev->pcidev->dev, priv->RcvCtl[ctx].dma, + priv->RcvCtl[ctx].len, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx; @@ -1028,16 +1033,16 @@ mpt_lan_receive_post_reply(struct net_device *dev, // IOC_AND_NETDEV_NAMES_s_s(dev), // i, l)); - pci_dma_sync_single_for_cpu(mpt_dev->pcidev, - priv->RcvCtl[ctx].dma, - priv->RcvCtl[ctx].len, - PCI_DMA_FROMDEVICE); + dma_sync_single_for_cpu(&mpt_dev->pcidev->dev, + priv->RcvCtl[ctx].dma, + priv->RcvCtl[ctx].len, + DMA_FROM_DEVICE); skb_copy_from_linear_data(old_skb, skb_put(skb, l), l); - pci_dma_sync_single_for_device(mpt_dev->pcidev, - priv->RcvCtl[ctx].dma, - priv->RcvCtl[ctx].len, - PCI_DMA_FROMDEVICE); + dma_sync_single_for_device(&mpt_dev->pcidev->dev, + priv->RcvCtl[ctx].dma, + priv->RcvCtl[ctx].len, + DMA_FROM_DEVICE); priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx; szrem -= l; @@ -1056,17 +1061,17 @@ mpt_lan_receive_post_reply(struct net_device *dev, return -ENOMEM; } - pci_dma_sync_single_for_cpu(mpt_dev->pcidev, - priv->RcvCtl[ctx].dma, - priv->RcvCtl[ctx].len, - PCI_DMA_FROMDEVICE); + dma_sync_single_for_cpu(&mpt_dev->pcidev->dev, + priv->RcvCtl[ctx].dma, + priv->RcvCtl[ctx].len, + DMA_FROM_DEVICE); skb_copy_from_linear_data(old_skb, skb_put(skb, len), len); - pci_dma_sync_single_for_device(mpt_dev->pcidev, - priv->RcvCtl[ctx].dma, - priv->RcvCtl[ctx].len, - PCI_DMA_FROMDEVICE); + dma_sync_single_for_device(&mpt_dev->pcidev->dev, + priv->RcvCtl[ctx].dma, + priv->RcvCtl[ctx].len, + DMA_FROM_DEVICE); spin_lock_irqsave(&priv->rxfidx_lock, flags); priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx; @@ -1077,8 +1082,8 @@ mpt_lan_receive_post_reply(struct net_device *dev, priv->RcvCtl[ctx].skb = NULL; - pci_unmap_single(mpt_dev->pcidev, priv->RcvCtl[ctx].dma, - priv->RcvCtl[ctx].len, PCI_DMA_FROMDEVICE); + dma_unmap_single(&mpt_dev->pcidev->dev, priv->RcvCtl[ctx].dma, + priv->RcvCtl[ctx].len, DMA_FROM_DEVICE); priv->RcvCtl[ctx].dma = 0; priv->mpt_rxfidx[++priv->mpt_rxfidx_tail] = ctx; @@ -1199,10 +1204,10 @@ mpt_lan_post_receive_buckets(struct mpt_lan_priv *priv) skb = priv->RcvCtl[ctx].skb; if (skb && (priv->RcvCtl[ctx].len != len)) { - pci_unmap_single(mpt_dev->pcidev, + dma_unmap_single(&mpt_dev->pcidev->dev, priv->RcvCtl[ctx].dma, priv->RcvCtl[ctx].len, - PCI_DMA_FROMDEVICE); + DMA_FROM_DEVICE); dev_kfree_skb(priv->RcvCtl[ctx].skb); skb = priv->RcvCtl[ctx].skb = NULL; } @@ -1218,8 +1223,9 @@ mpt_lan_post_receive_buckets(struct mpt_lan_priv *priv) break; } - dma = pci_map_single(mpt_dev->pcidev, skb->data, - len, PCI_DMA_FROMDEVICE); + dma = dma_map_single(&mpt_dev->pcidev->dev, + skb->data, len, + DMA_FROM_DEVICE); priv->RcvCtl[ctx].skb = skb; priv->RcvCtl[ctx].dma = dma; diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 091b45024d34349537d95685ad8666a74f921bed..4acd8f9a48e16eb8d5508e5b04d0cf6246f16050 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -702,8 +702,8 @@ mptsas_add_device_component_starget_ir(MPT_ADAPTER *ioc, if (!hdr.PageLength) goto out; - buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) goto out; @@ -769,8 +769,8 @@ mptsas_add_device_component_starget_ir(MPT_ADAPTER *ioc, out: if (buffer) - pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer, - dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + buffer, dma_handle); } /** @@ -1399,8 +1399,8 @@ mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc, struct mptsas_enclosure *enclosure, goto out; } - buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) { error = -ENOMEM; goto out; @@ -1426,8 +1426,8 @@ mptsas_sas_enclosure_pg0(MPT_ADAPTER *ioc, struct mptsas_enclosure *enclosure, enclosure->sep_channel = buffer->SEPBus; out_free_consistent: - pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - buffer, dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer, + dma_handle); out: return error; } @@ -2058,8 +2058,8 @@ static int mptsas_get_linkerrors(struct sas_phy *phy) if (!hdr.ExtPageLength) return -ENXIO; - buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -2081,8 +2081,8 @@ static int mptsas_get_linkerrors(struct sas_phy *phy) le32_to_cpu(buffer->PhyResetProblemCount); out_free_consistent: - pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - buffer, dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer, + dma_handle); return error; } @@ -2301,7 +2301,7 @@ static void mptsas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, << MPI_SGE_FLAGS_SHIFT; if (!dma_map_sg(&ioc->pcidev->dev, job->request_payload.sg_list, - 1, PCI_DMA_BIDIRECTIONAL)) + 1, DMA_BIDIRECTIONAL)) goto put_mf; flagsLength |= (sg_dma_len(job->request_payload.sg_list) - 4); @@ -2318,7 +2318,7 @@ static void mptsas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, flagsLength = flagsLength << MPI_SGE_FLAGS_SHIFT; if (!dma_map_sg(&ioc->pcidev->dev, job->reply_payload.sg_list, - 1, PCI_DMA_BIDIRECTIONAL)) + 1, DMA_BIDIRECTIONAL)) goto unmap_out; flagsLength |= sg_dma_len(job->reply_payload.sg_list) + 4; ioc->add_sge(psge, flagsLength, @@ -2356,10 +2356,10 @@ static void mptsas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost, unmap_in: dma_unmap_sg(&ioc->pcidev->dev, job->reply_payload.sg_list, 1, - PCI_DMA_BIDIRECTIONAL); + DMA_BIDIRECTIONAL); unmap_out: dma_unmap_sg(&ioc->pcidev->dev, job->request_payload.sg_list, 1, - PCI_DMA_BIDIRECTIONAL); + DMA_BIDIRECTIONAL); put_mf: if (mf) mpt_free_msg_frame(ioc, mf); @@ -2412,8 +2412,8 @@ mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info) goto out; } - buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) { error = -ENOMEM; goto out; @@ -2452,8 +2452,8 @@ mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info) } out_free_consistent: - pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - buffer, dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer, + dma_handle); out: return error; } @@ -2487,8 +2487,8 @@ mptsas_sas_io_unit_pg1(MPT_ADAPTER *ioc) goto out; } - buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) { error = -ENOMEM; goto out; @@ -2509,8 +2509,8 @@ mptsas_sas_io_unit_pg1(MPT_ADAPTER *ioc) device_missing_delay & MPI_SAS_IOUNIT1_REPORT_MISSING_TIMEOUT_MASK; out_free_consistent: - pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - buffer, dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer, + dma_handle); out: return error; } @@ -2551,8 +2551,8 @@ mptsas_sas_phy_pg0(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info, goto out; } - buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) { error = -ENOMEM; goto out; @@ -2573,8 +2573,8 @@ mptsas_sas_phy_pg0(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info, phy_info->attached.handle = le16_to_cpu(buffer->AttachedDevHandle); out_free_consistent: - pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - buffer, dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer, + dma_handle); out: return error; } @@ -2614,8 +2614,8 @@ mptsas_sas_device_pg0(MPT_ADAPTER *ioc, struct mptsas_devinfo *device_info, goto out; } - buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) { error = -ENOMEM; goto out; @@ -2654,8 +2654,8 @@ mptsas_sas_device_pg0(MPT_ADAPTER *ioc, struct mptsas_devinfo *device_info, device_info->flags = le16_to_cpu(buffer->Flags); out_free_consistent: - pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - buffer, dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer, + dma_handle); out: return error; } @@ -2697,8 +2697,8 @@ mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info, goto out; } - buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) { error = -ENOMEM; goto out; @@ -2737,8 +2737,8 @@ mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info, } out_free_consistent: - pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - buffer, dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer, + dma_handle); out: return error; } @@ -2777,8 +2777,8 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info, goto out; } - buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) { error = -ENOMEM; goto out; @@ -2810,8 +2810,8 @@ mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info, phy_info->attached.handle = le16_to_cpu(buffer->AttachedDevHandle); out_free_consistent: - pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, - buffer, dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.ExtPageLength * 4, buffer, + dma_handle); out: return error; } @@ -2896,7 +2896,8 @@ mptsas_exp_repmanufacture_info(MPT_ADAPTER *ioc, sz = sizeof(struct rep_manu_request) + sizeof(struct rep_manu_reply); - data_out = pci_alloc_consistent(ioc->pcidev, sz, &data_out_dma); + data_out = dma_alloc_coherent(&ioc->pcidev->dev, sz, &data_out_dma, + GFP_KERNEL); if (!data_out) { printk(KERN_ERR "Memory allocation failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); @@ -2987,7 +2988,8 @@ mptsas_exp_repmanufacture_info(MPT_ADAPTER *ioc, } out_free: if (data_out_dma) - pci_free_consistent(ioc->pcidev, sz, data_out, data_out_dma); + dma_free_coherent(&ioc->pcidev->dev, sz, data_out, + data_out_dma); put_mf: if (mf) mpt_free_msg_frame(ioc, mf); @@ -4271,8 +4273,8 @@ mptsas_adding_inactive_raid_components(MPT_ADAPTER *ioc, u8 channel, u8 id) if (!hdr.PageLength) goto out; - buffer = pci_alloc_consistent(ioc->pcidev, hdr.PageLength * 4, - &dma_handle); + buffer = dma_alloc_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + &dma_handle, GFP_KERNEL); if (!buffer) goto out; @@ -4318,8 +4320,8 @@ mptsas_adding_inactive_raid_components(MPT_ADAPTER *ioc, u8 channel, u8 id) out: if (buffer) - pci_free_consistent(ioc->pcidev, hdr.PageLength * 4, buffer, - dma_handle); + dma_free_coherent(&ioc->pcidev->dev, hdr.PageLength * 4, + buffer, dma_handle); } /* * Work queue thread to handle SAS hotplug events diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c index c3305bdda69cda77f8553b4898e861e9b2c001d8..bee727ed98db5734af2effa82ef47ce8da9546e8 100644 --- a/drivers/misc/eeprom/at25.c +++ b/drivers/misc/eeprom/at25.c @@ -440,6 +440,10 @@ static int at25_probe(struct spi_device *spi) return -ENXIO; } + at25 = devm_kzalloc(&spi->dev, sizeof(*at25), GFP_KERNEL); + if (!at25) + return -ENOMEM; + mutex_init(&at25->lock); at25->spi = spi; spi_set_drvdata(spi, at25); diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c index bb9c4512c968c8b3435b65cc46b713f3c75e216b..9fbfe784d71015aeaee46adaa57c2ecfaf96e077 100644 --- a/drivers/misc/eeprom/ee1004.c +++ b/drivers/misc/eeprom/ee1004.c @@ -114,6 +114,9 @@ static ssize_t ee1004_eeprom_read(struct i2c_client *client, char *buf, if (offset + count > EE1004_PAGE_SIZE) count = EE1004_PAGE_SIZE - offset; + if (count > I2C_SMBUS_BLOCK_MAX) + count = I2C_SMBUS_BLOCK_MAX; + return i2c_smbus_read_i2c_block_data_or_emulated(client, offset, count, buf); } diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 4ccbf43e6bfa942bc40075760841dc1ddef7a447..aa1682b94a23b9b04c91bad34f3367732176d3e5 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -1288,7 +1288,14 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp) } if (copy_to_user(argp, &bp, sizeof(bp))) { - dma_buf_put(buf->dmabuf); + /* + * The usercopy failed, but we can't do much about it, as + * dma_buf_fd() already called fd_install() and made the + * file descriptor accessible for the current process. It + * might already be closed and dmabuf no longer valid when + * we reach this point. Therefore "leak" the fd and rely on + * the process exit path to do any required cleanup. + */ return -EFAULT; } diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c index 45f5787939805dd75d4ba84a944368f68632e5ec..bd87012c220c2b04d2b2faee9449f76cef2f13de 100644 --- a/drivers/mmc/core/sd.c +++ b/drivers/mmc/core/sd.c @@ -67,7 +67,7 @@ static const unsigned int sd_au_size[] = { __res & __mask; \ }) -#define SD_POWEROFF_NOTIFY_TIMEOUT_MS 2000 +#define SD_POWEROFF_NOTIFY_TIMEOUT_MS 1000 #define SD_WRITE_EXTR_SINGLE_TIMEOUT_MS 1000 struct sd_busy_data { @@ -1664,6 +1664,12 @@ static int sd_poweroff_notify(struct mmc_card *card) goto out; } + /* Find out when the command is completed. */ + err = mmc_poll_for_busy(card, SD_WRITE_EXTR_SINGLE_TIMEOUT_MS, false, + MMC_BUSY_EXTR_SINGLE); + if (err) + goto out; + cb_data.card = card; cb_data.reg_buf = reg_buf; err = __mmc_poll_for_busy(card->host, SD_POWEROFF_NOTIFY_TIMEOUT_MS, diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c index 16d1c7a43d331938e2c8fb0513eede9129776a69..b6eb75f4bbfc6d1be06dc9556d7e1cf02ac88bb6 100644 --- a/drivers/mmc/host/moxart-mmc.c +++ b/drivers/mmc/host/moxart-mmc.c @@ -705,12 +705,12 @@ static int moxart_remove(struct platform_device *pdev) if (!IS_ERR_OR_NULL(host->dma_chan_rx)) dma_release_channel(host->dma_chan_rx); mmc_remove_host(mmc); - mmc_free_host(mmc); writel(0, host->base + REG_INTERRUPT_MASK); writel(0, host->base + REG_POWER_CONTROL); writel(readl(host->base + REG_CLOCK_CONTROL) | CLK_OFF, host->base + REG_CLOCK_CONTROL); + mmc_free_host(mmc); return 0; } diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 084c61b2cbecec6de54c8d55aebcdcabee3b72f5..2797a9c0f17d86f3ee774a45d02d3566f4ed723e 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -642,7 +642,7 @@ static int renesas_sdhi_select_tuning(struct tmio_mmc_host *host) * is at least SH_MOBILE_SDHI_MIN_TAP_ROW probes long then use the * center index as the tap, otherwise bail out. */ - bitmap_for_each_set_region(bitmap, rs, re, 0, taps_size) { + for_each_set_bitrange(rs, re, bitmap, taps_size) { if (re - rs > tap_cnt) { tap_end = re; tap_start = rs; diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index a593b1fbd69e5523f64fb5ebbcc9487d7a14ad3f..0f3658b36513c681ae2e2bf8d71c551fbe38bc9e 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -524,12 +524,16 @@ static void esdhc_of_adma_workaround(struct sdhci_host *host, u32 intmask) static int esdhc_of_enable_dma(struct sdhci_host *host) { + int ret; u32 value; struct device *dev = mmc_dev(host->mmc); if (of_device_is_compatible(dev->of_node, "fsl,ls1043a-esdhc") || - of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc")) - dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc")) { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + if (ret) + return ret; + } value = sdhci_readl(host, ESDHC_DMA_SYSCTL); diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c index bcc595c70a9fbcc33c048dd5e7b11c8da908685e..104dcd702870c423b9b5f2a7fb1c20e186adb580 100644 --- a/drivers/mmc/host/sh_mmcif.c +++ b/drivers/mmc/host/sh_mmcif.c @@ -405,6 +405,9 @@ static int sh_mmcif_dma_slave_config(struct sh_mmcif_host *host, struct dma_slave_config cfg = { 0, }; res = platform_get_resource(host->pd, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + cfg.direction = direction; if (direction == DMA_DEV_TO_MEM) { diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 6006c2e8fa2bcd46ca74f89277aab77867727ff6..9fd1d6cba3cdaa22df34a2a4e2cb0e85863f853b 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -1021,8 +1021,8 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) if (port->aggregator && port->aggregator->is_active && !__port_is_enabled(port)) { - __enable_port(port); + *update_slave_arr = true; } } break; @@ -1779,6 +1779,7 @@ static void ad_agg_selection_logic(struct aggregator *agg, port = port->next_port_in_aggregator) { __enable_port(port); } + *update_slave_arr = true; } } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ec498ce70f35a67a8b4c93bcae28312c7be6bf33..238b56d77c369d9595d55bc681c2191c49dd2905 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4133,9 +4133,7 @@ static int bond_eth_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cm fallthrough; case SIOCGHWTSTAMP: - rcu_read_lock(); real_dev = bond_option_active_slave_get_rcu(bond); - rcu_read_unlock(); if (!real_dev) return -EOPNOTSUPP; @@ -5382,9 +5380,7 @@ static int bond_ethtool_get_ts_info(struct net_device *bond_dev, struct net_device *real_dev; struct phy_device *phydev; - rcu_read_lock(); real_dev = bond_option_active_slave_get_rcu(bond); - rcu_read_unlock(); if (real_dev) { ops = real_dev->ethtool_ops; phydev = real_dev->phydev; diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c index 2ec11af5f0cce2b4ac26efd39d01767efeb5796b..46b150e6289ef4607c8ddbcd2b833ff4dd64cc9b 100644 --- a/drivers/net/bonding/bond_procfs.c +++ b/drivers/net/bonding/bond_procfs.c @@ -11,7 +11,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { - struct bonding *bond = PDE_DATA(file_inode(seq->file)); + struct bonding *bond = pde_data(file_inode(seq->file)); struct list_head *iter; struct slave *slave; loff_t off = 0; @@ -30,7 +30,7 @@ static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct bonding *bond = PDE_DATA(file_inode(seq->file)); + struct bonding *bond = pde_data(file_inode(seq->file)); struct list_head *iter; struct slave *slave; bool found = false; @@ -57,7 +57,7 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v) static void bond_info_show_master(struct seq_file *seq) { - struct bonding *bond = PDE_DATA(file_inode(seq->file)); + struct bonding *bond = pde_data(file_inode(seq->file)); const struct bond_opt_value *optval; struct slave *curr, *primary; int i; @@ -175,7 +175,7 @@ static void bond_info_show_master(struct seq_file *seq) static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) { - struct bonding *bond = PDE_DATA(file_inode(seq->file)); + struct bonding *bond = pde_data(file_inode(seq->file)); seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); seq_printf(seq, "MII Status: %s\n", bond_slave_link_status(slave->link)); diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c index 0bff1884d5cc6615057399c63395274798933033..74d7fcbfd065b14c9d578268b1db4bc84594b542 100644 --- a/drivers/net/can/flexcan/flexcan-core.c +++ b/drivers/net/can/flexcan/flexcan-core.c @@ -296,6 +296,7 @@ static_assert(sizeof(struct flexcan_regs) == 0x4 * 18 + 0xfb8); static const struct flexcan_devtype_data fsl_mcf5441x_devtype_data = { .quirks = FLEXCAN_QUIRK_BROKEN_PERR_STATE | FLEXCAN_QUIRK_NR_IRQ_3 | FLEXCAN_QUIRK_NR_MB_16 | + FLEXCAN_QUIRK_SUPPPORT_RX_MAILBOX | FLEXCAN_QUIRK_SUPPPORT_RX_FIFO, }; diff --git a/drivers/net/can/flexcan/flexcan.h b/drivers/net/can/flexcan/flexcan.h index fccdff8b1f0f10fa3264c7b42b23ee28d25e1110..23fc09a7e10f330ecabd1412b6c96033ae23f274 100644 --- a/drivers/net/can/flexcan/flexcan.h +++ b/drivers/net/can/flexcan/flexcan.h @@ -21,7 +21,7 @@ * Below is some version info we got: * SOC Version IP-Version Glitch- [TR]WRN_INT IRQ Err Memory err RTR rece- FD Mode MB * Filter? connected? Passive detection ption in MB Supported? - * MCF5441X FlexCAN2 ? no yes no no yes no 16 + * MCF5441X FlexCAN2 ? no yes no no no no 16 * MX25 FlexCAN2 03.00.00.00 no no no no no no 64 * MX28 FlexCAN2 03.00.04.00 yes yes no no no no 64 * MX35 FlexCAN2 03.00.00.00 no no no no no no 64 diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 5b47cd8677837e405f8a5bc5adb3823baa133062..1a4b56f6fa8c6a0383f463091097579f881c902d 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -336,6 +336,9 @@ m_can_fifo_read(struct m_can_classdev *cdev, u32 addr_offset = cdev->mcfg[MRAM_RXF0].off + fgi * RXF0_ELEMENT_SIZE + offset; + if (val_count == 0) + return 0; + return cdev->ops->read_fifo(cdev, addr_offset, val, val_count); } @@ -346,6 +349,9 @@ m_can_fifo_write(struct m_can_classdev *cdev, u32 addr_offset = cdev->mcfg[MRAM_TXB].off + fpi * TXB_ELEMENT_SIZE + offset; + if (val_count == 0) + return 0; + return cdev->ops->write_fifo(cdev, addr_offset, val, val_count); } diff --git a/drivers/net/can/m_can/tcan4x5x-regmap.c b/drivers/net/can/m_can/tcan4x5x-regmap.c index ca80dbaf7a3f5415050fd143125a00ee4aa35397..26e212b8ca7a6af86ba4467f2b54d16f17db112b 100644 --- a/drivers/net/can/m_can/tcan4x5x-regmap.c +++ b/drivers/net/can/m_can/tcan4x5x-regmap.c @@ -12,7 +12,7 @@ #define TCAN4X5X_SPI_INSTRUCTION_WRITE (0x61 << 24) #define TCAN4X5X_SPI_INSTRUCTION_READ (0x41 << 24) -#define TCAN4X5X_MAX_REGISTER 0x8ffc +#define TCAN4X5X_MAX_REGISTER 0x87fc static int tcan4x5x_regmap_gather_write(void *context, const void *reg, size_t reg_len, diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 7b1457a6e327b01d5415c71dcd06ed4116234706..c0c91440340ae67d0e313a012b8d5c70d2ec9a3e 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -36,6 +36,7 @@ config NET_DSA_LANTIQ_GSWIP config NET_DSA_MT7530 tristate "MediaTek MT753x and MT7621 Ethernet switch support" select NET_DSA_TAG_MTK + select MEDIATEK_GE_PHY help This enables support for the MediaTek MT7530, MT7531, and MT7621 Ethernet switch chips. diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 33499fcd88484eb77031eebcf660b08232f8236b..6afb5db8244cd1650426a646988f3feb3b07c3d4 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -621,7 +621,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) get_device(&priv->master_mii_bus->dev); priv->master_mii_dn = dn; - priv->slave_mii_bus = devm_mdiobus_alloc(ds->dev); + priv->slave_mii_bus = mdiobus_alloc(); if (!priv->slave_mii_bus) { of_node_put(dn); return -ENOMEM; @@ -681,8 +681,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) } err = mdiobus_register(priv->slave_mii_bus); - if (err && dn) + if (err && dn) { + mdiobus_free(priv->slave_mii_bus); of_node_put(dn); + } return err; } @@ -690,6 +692,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv) { mdiobus_unregister(priv->slave_mii_bus); + mdiobus_free(priv->slave_mii_bus); of_node_put(priv->master_mii_dn); } diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 46ed953e787e15cab14b1df78da5c24625252f1f..320ee7fe91a8cfb778151d924dae7d152ec12f34 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -498,8 +498,9 @@ static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg) static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np) { struct dsa_switch *ds = priv->ds; + int err; - ds->slave_mii_bus = devm_mdiobus_alloc(priv->dev); + ds->slave_mii_bus = mdiobus_alloc(); if (!ds->slave_mii_bus) return -ENOMEM; @@ -512,7 +513,11 @@ static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np) ds->slave_mii_bus->parent = priv->dev; ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; - return of_mdiobus_register(ds->slave_mii_bus, mdio_np); + err = of_mdiobus_register(ds->slave_mii_bus, mdio_np); + if (err) + mdiobus_free(ds->slave_mii_bus); + + return err; } static int gswip_pce_table_entry_read(struct gswip_priv *priv, @@ -2145,8 +2150,10 @@ disable_switch: gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); dsa_unregister_switch(priv->ds); mdio_bus: - if (mdio_np) + if (mdio_np) { mdiobus_unregister(priv->ds->slave_mii_bus); + mdiobus_free(priv->ds->slave_mii_bus); + } put_mdio_node: of_node_put(mdio_np); for (i = 0; i < priv->num_gphy_fw; i++) @@ -2169,6 +2176,7 @@ static int gswip_remove(struct platform_device *pdev) if (priv->ds->slave_mii_bus) { mdiobus_unregister(priv->ds->slave_mii_bus); + mdiobus_free(priv->ds->slave_mii_bus); of_node_put(priv->ds->slave_mii_bus->dev.of_node); } diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index b82512e5b33b081b5233f4a3e02f5971bdbed342..ff3c267d0f268cbc46658763a75802eb8645c2ff 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -2074,7 +2074,7 @@ mt7530_setup_mdio(struct mt7530_priv *priv) if (priv->irq) mt7530_setup_mdio_irq(priv); - ret = mdiobus_register(bus); + ret = devm_mdiobus_register(dev, bus); if (ret) { dev_err(dev, "failed to register MDIO bus: %d\n", ret); if (priv->irq) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 58ca684d73f742d7d8e62c5e9e426835ee151ba9..8530dbe403f4dc6c970d2c19bbf104aec6dc7419 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3399,7 +3399,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, return err; } - bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus)); + bus = mdiobus_alloc_size(sizeof(*mdio_bus)); if (!bus) return -ENOMEM; @@ -3424,14 +3424,14 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, if (!external) { err = mv88e6xxx_g2_irq_mdio_setup(chip, bus); if (err) - return err; + goto out; } err = of_mdiobus_register(bus, np); if (err) { dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err); mv88e6xxx_g2_irq_mdio_free(chip, bus); - return err; + goto out; } if (external) @@ -3440,21 +3440,26 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, list_add(&mdio_bus->list, &chip->mdios); return 0; + +out: + mdiobus_free(bus); + return err; } static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip) { - struct mv88e6xxx_mdio_bus *mdio_bus; + struct mv88e6xxx_mdio_bus *mdio_bus, *p; struct mii_bus *bus; - list_for_each_entry(mdio_bus, &chip->mdios, list) { + list_for_each_entry_safe(mdio_bus, p, &chip->mdios, list) { bus = mdio_bus->bus; if (!mdio_bus->external) mv88e6xxx_g2_irq_mdio_free(chip, bus); mdiobus_unregister(bus); + mdiobus_free(bus); } } diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index bf8d38239e7e81eb675e040a4a87acfabdd6c774..33f0ceae381d43dc42bb8a3e9bed008c8d38e883 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1061,7 +1061,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot) return PTR_ERR(hw); } - bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv)); + bus = mdiobus_alloc_size(sizeof(*mdio_priv)); if (!bus) return -ENOMEM; @@ -1081,6 +1081,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot) rc = mdiobus_register(bus); if (rc < 0) { dev_err(dev, "failed to register MDIO bus\n"); + mdiobus_free(bus); return rc; } @@ -1132,6 +1133,7 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot) lynx_pcs_destroy(phylink_pcs); } mdiobus_unregister(felix->imdio); + mdiobus_free(felix->imdio); } static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 8c1c9da61602ff45ae5bc4ca457ac11b85587a4a..f2f1608a476c87d9318f01ccdd6dbcd04bafa5d0 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1029,7 +1029,7 @@ static int vsc9953_mdio_bus_alloc(struct ocelot *ocelot) } /* Needed in order to initialize the bus mutex lock */ - rc = of_mdiobus_register(bus, NULL); + rc = devm_of_mdiobus_register(dev, bus, NULL); if (rc < 0) { dev_err(dev, "failed to register MDIO bus\n"); return rc; @@ -1083,7 +1083,8 @@ static void vsc9953_mdio_bus_free(struct ocelot *ocelot) mdio_device_free(mdio_device); lynx_pcs_destroy(phylink_pcs); } - mdiobus_unregister(felix->imdio); + + /* mdiobus_unregister and mdiobus_free handled by devres */ } static const struct felix_info seville_info_vsc9953 = { diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index da0d7e68643a9d21c35683f2a25449b9f87a7d42..c39de2a4c1fe04abe82c55f620fcedac0526fb77 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -378,7 +378,7 @@ static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv) if (!mnp) return -ENODEV; - ret = of_mdiobus_register(mbus, mnp); + ret = devm_of_mdiobus_register(dev, mbus, mnp); of_node_put(mnp); if (ret) return ret; @@ -1091,7 +1091,6 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev) } irq_domain_remove(priv->irqdomain); - mdiobus_unregister(priv->mbus); dsa_unregister_switch(&priv->ds); reset_control_assert(priv->sw_reset); diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 481f1df3106c462a5551103d380dd45583665f52..8aec5d9fbfef2803c181387537300502a937caf0 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -2278,6 +2278,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) struct net_device *dev; struct typhoon *tp; int card_id = (int) ent->driver_data; + u8 addr[ETH_ALEN] __aligned(4); void __iomem *ioaddr; void *shared; dma_addr_t shared_dma; @@ -2409,8 +2410,9 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto error_out_reset; } - *(__be16 *)&dev->dev_addr[0] = htons(le16_to_cpu(xp_resp[0].parm1)); - *(__be32 *)&dev->dev_addr[2] = htonl(le32_to_cpu(xp_resp[0].parm2)); + *(__be16 *)&addr[0] = htons(le16_to_cpu(xp_resp[0].parm1)); + *(__be32 *)&addr[2] = htonl(le32_to_cpu(xp_resp[0].parm2)); + eth_hw_addr_set(dev, addr); if (!is_valid_ether_addr(dev->dev_addr)) { err_msg = "Could not obtain valid ethernet address, aborting"; diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c index bd22a534b1c02ce5a419fc0154c2ab990bd944ca..e7b879123bb19d4f3e69a4ff7c22bab411b6517a 100644 --- a/drivers/net/ethernet/8390/etherh.c +++ b/drivers/net/ethernet/8390/etherh.c @@ -655,6 +655,7 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id) struct ei_device *ei_local; struct net_device *dev; struct etherh_priv *eh; + u8 addr[ETH_ALEN]; int ret; ret = ecard_request_resources(ec); @@ -724,12 +725,13 @@ etherh_probe(struct expansion_card *ec, const struct ecard_id *id) spin_lock_init(&ei_local->page_lock); if (ec->cid.product == PROD_ANT_ETHERM) { - etherm_addr(dev->dev_addr); + etherm_addr(addr); ei_local->reg_offset = etherm_regoffsets; } else { - etherh_addr(dev->dev_addr, ec); + etherh_addr(addr, ec); ei_local->reg_offset = etherh_regoffsets; } + eth_hw_addr_set(dev, addr); ei_local->name = dev->name; ei_local->word16 = 1; diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c index 493b0cefcc2ae72e3d23a3adc22e08f1992843f5..ec8df05e7bf6138c9b42431ec70c5811f1b6c0c3 100644 --- a/drivers/net/ethernet/amd/declance.c +++ b/drivers/net/ethernet/amd/declance.c @@ -1032,6 +1032,7 @@ static int dec_lance_probe(struct device *bdev, const int type) int i, ret; unsigned long esar_base; unsigned char *esar; + u8 addr[ETH_ALEN]; const char *desc; if (dec_lance_debug && version_printed++ == 0) @@ -1228,7 +1229,8 @@ static int dec_lance_probe(struct device *bdev, const int type) break; } for (i = 0; i < 6; i++) - dev->dev_addr[i] = esar[i * 4]; + addr[i] = esar[i * 4]; + eth_hw_addr_set(dev, addr); printk("%s: %s, addr = %pM, irq = %d\n", name, desc, dev->dev_addr, dev->irq); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 492ac383f16df068f38a8c8776f8681eb11edd36..a3593290886f8cc61a0cc7c0f22da9be59402fe9 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata) if (!channel->tx_ring) break; + /* Deactivate the Tx timer */ del_timer_sync(&channel->tx_timer); + channel->tx_timer_active = 0; } } @@ -2550,6 +2552,14 @@ read_again: buf2_len = xgbe_rx_buf2_len(rdata, packet, len); len += buf2_len; + if (buf2_len > rdata->rx.buf.dma_len) { + /* Hardware inconsistency within the descriptors + * that has resulted in a length underflow. + */ + error = 1; + goto skip_data; + } + if (!skb) { skb = xgbe_create_skb(pdata, napi, rdata, buf1_len); @@ -2579,8 +2589,10 @@ skip_data: if (!last || context_next) goto read_again; - if (!skb) + if (!skb || error) { + dev_kfree_skb(skb); goto next_packet; + } /* Be sure we don't exceed the configured MTU */ max_len = netdev->mtu + ETH_HLEN; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index efdcf484a510f139dbd2d8ce5a9f171aef2ad076..2af3da4b2d05355717e42ee89b21dff9047f04e8 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -425,6 +425,9 @@ static void xgbe_pci_remove(struct pci_dev *pdev) pci_free_irq_vectors(pdata->pcidev); + /* Disable all interrupts in the hardware */ + XP_IOWRITE(pdata, XP_INT_EN, 0x0); + xgbe_free_pdata(pdata); } diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c index 1bc4d33a0ce5410e78167df88363254d03c5e78d..30a573db02bb638cfb17928ae546ffd09f16f948 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c @@ -826,7 +826,6 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic) struct aq_hw_s *aq_hw = aq_nic->aq_hw; int hweight = 0; int err = 0; - int i; if (unlikely(!aq_hw_ops->hw_filter_vlan_set)) return -EOPNOTSUPP; @@ -837,8 +836,7 @@ int aq_filters_vlans_update(struct aq_nic_s *aq_nic) aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans); if (aq_nic->ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) { - for (i = 0; i < BITS_TO_LONGS(VLAN_N_VID); i++) - hweight += hweight_long(aq_nic->active_vlans[i]); + hweight = bitmap_weight(aq_nic->active_vlans, VLAN_N_VID); err = aq_hw_ops->hw_filter_vlan_ctrl(aq_hw, false); if (err) @@ -871,7 +869,7 @@ int aq_filters_vlan_offload_off(struct aq_nic_s *aq_nic) struct aq_hw_s *aq_hw = aq_nic->aq_hw; int err = 0; - memset(aq_nic->active_vlans, 0, sizeof(aq_nic->active_vlans)); + bitmap_zero(aq_nic->active_vlans, VLAN_N_VID); aq_fvlan_rebuild(aq_nic, aq_nic->active_vlans, aq_nic->aq_hw_rx_fltrs.fl2.aq_vlans); diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c index f38f40eb966e3cd70dbdafc0caa458e75651fe90..a1a38456c9a33aad0eb1afbe228dbe11d56ca1da 100644 --- a/drivers/net/ethernet/broadcom/sb1250-mac.c +++ b/drivers/net/ethernet/broadcom/sb1250-mac.c @@ -2183,9 +2183,7 @@ static int sbmac_init(struct platform_device *pldev, long long base) ea_reg >>= 8; } - for (i = 0; i < 6; i++) { - dev->dev_addr[i] = eaddr[i]; - } + eth_hw_addr_set(dev, eaddr); /* * Initialize context (get pointers to registers and stuff), then diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index a363da928e8b37dbfc134d6b96d4cf521dc0f008..98498a76ae162e1c15551098ade064fb64b8efce 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -4712,7 +4712,7 @@ static int macb_probe(struct platform_device *pdev) #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) { - dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); bp->hw_dma_cap |= HW_DMA_CAP_64B; } #endif diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index e985ae008a979585c3c0776a085b0788cd79a177..dd9385d15f6bdf1ed8382570bd970fb03e9bc484 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4523,12 +4523,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) #ifdef CONFIG_DEBUG_FS dpaa2_dbg_remove(priv); #endif + + unregister_netdev(net_dev); rtnl_lock(); dpaa2_eth_disconnect_mac(priv); rtnl_unlock(); - unregister_netdev(net_dev); - dpaa2_eth_dl_port_del(priv); dpaa2_eth_dl_traps_unregister(priv); dpaa2_eth_dl_free(priv); diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c index bbbde9f701c27a7ca82268f8259f3e81b36c9704..be0bd4b44926c54f6839cfc18eab72e868519ba6 100644 --- a/drivers/net/ethernet/freescale/fec_mpc52xx.c +++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c @@ -99,13 +99,13 @@ static void mpc52xx_fec_tx_timeout(struct net_device *dev, unsigned int txqueue) netif_wake_queue(dev); } -static void mpc52xx_fec_set_paddr(struct net_device *dev, u8 *mac) +static void mpc52xx_fec_set_paddr(struct net_device *dev, const u8 *mac) { struct mpc52xx_fec_priv *priv = netdev_priv(dev); struct mpc52xx_fec __iomem *fec = priv->fec; - out_be32(&fec->paddr1, *(u32 *)(&mac[0])); - out_be32(&fec->paddr2, (*(u16 *)(&mac[4]) << 16) | FEC_PADDR2_TYPE); + out_be32(&fec->paddr1, *(const u32 *)(&mac[0])); + out_be32(&fec->paddr2, (*(const u16 *)(&mac[4]) << 16) | FEC_PADDR2_TYPE); } static int mpc52xx_fec_set_mac_address(struct net_device *dev, void *addr) @@ -893,13 +893,15 @@ static int mpc52xx_fec_probe(struct platform_device *op) rv = of_get_ethdev_address(np, ndev); if (rv) { struct mpc52xx_fec __iomem *fec = priv->fec; + u8 addr[ETH_ALEN] __aligned(4); /* * If the MAC addresse is not provided via DT then read * it back from the controller regs */ - *(u32 *)(&ndev->dev_addr[0]) = in_be32(&fec->paddr1); - *(u16 *)(&ndev->dev_addr[4]) = in_be32(&fec->paddr2) >> 16; + *(u32 *)(&addr[0]) = in_be32(&fec->paddr1); + *(u16 *)(&addr[4]) = in_be32(&fec->paddr2) >> 16; + eth_hw_addr_set(ndev, addr); } /* diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 5f5d4f7aa8135019002fb9172a5d512cfc5005de..160735484465a3f78d48eb5d0ac75f37ec4c000a 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -843,7 +843,7 @@ static inline bool gve_is_gqi(struct gve_priv *priv) /* buffers */ int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, - enum dma_data_direction); + enum dma_data_direction, gfp_t gfp_flags); void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, enum dma_data_direction); /* tx handling */ diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 2ad7f57f7e5b52780493d647d384684a65169df0..f7621ab672b9bce6ecca27b9b6b4a18f3d547bc7 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -301,7 +301,7 @@ static int gve_adminq_parse_err(struct gve_priv *priv, u32 status) */ static int gve_adminq_kick_and_wait(struct gve_priv *priv) { - u32 tail, head; + int tail, head; int i; tail = ioread32be(&priv->reg_bar0->adminq_event_counter); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index f7f65c4bf9939a4e1d235084876b8aac5d6222c0..54e51c8221b8cab1545a7cb50e93543398dbb8d6 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -766,9 +766,9 @@ static void gve_free_rings(struct gve_priv *priv) int gve_alloc_page(struct gve_priv *priv, struct device *dev, struct page **page, dma_addr_t *dma, - enum dma_data_direction dir) + enum dma_data_direction dir, gfp_t gfp_flags) { - *page = alloc_page(GFP_KERNEL); + *page = alloc_page(gfp_flags); if (!*page) { priv->page_alloc_fail++; return -ENOMEM; @@ -811,7 +811,7 @@ static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, for (i = 0; i < pages; i++) { err = gve_alloc_page(priv, &priv->pdev->dev, &qpl->pages[i], &qpl->page_buses[i], - gve_qpl_dma_dir(priv, id)); + gve_qpl_dma_dir(priv, id), GFP_KERNEL); /* caller handles clean up */ if (err) return -ENOMEM; diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index 9ddcc497f48e616180fd9676e052493c46ca2244..e4e98aa7745f3e57851f80330eeb1b5fa1da0b40 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -86,7 +86,8 @@ static int gve_rx_alloc_buffer(struct gve_priv *priv, struct device *dev, dma_addr_t dma; int err; - err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE); + err = gve_alloc_page(priv, dev, &page, &dma, DMA_FROM_DEVICE, + GFP_ATOMIC); if (err) return err; @@ -608,6 +609,7 @@ static bool gve_rx(struct gve_rx_ring *rx, netdev_features_t feat, *packet_size_bytes = skb->len + (skb->protocol ? ETH_HLEN : 0); *work_done = work_cnt; + skb_record_rx_queue(skb, rx->q_num); if (skb_is_nonlinear(skb)) napi_gro_frags(napi); else diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index beb8bb079023cc555a2eeab4d73b1b62d261ad1f..8c939628e2d85dadfafdf2acb55a43f69763ed90 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -157,7 +157,7 @@ static int gve_alloc_page_dqo(struct gve_priv *priv, int err; err = gve_alloc_page(priv, &priv->pdev->dev, &buf_state->page_info.page, - &buf_state->addr, DMA_FROM_DEVICE); + &buf_state->addr, DMA_FROM_DEVICE, GFP_KERNEL); if (err) return err; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 7df87610ad962b23a4c1b69c034a904493b7503b..21442a9bb9961385751aedc9e6aae077bb8b37c8 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2043,8 +2043,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) break; } - if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) - hclgevf_enable_vector(&hdev->misc_vector, true); + hclgevf_enable_vector(&hdev->misc_vector, true); return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c index c612ef526d16d684d3546ec2f870fb3b9f079e89..3e7d7c4bafdc2dcaa1ad0435255ba1628db42d2e 100644 --- a/drivers/net/ethernet/i825xx/ether1.c +++ b/drivers/net/ethernet/i825xx/ether1.c @@ -986,6 +986,7 @@ static int ether1_probe(struct expansion_card *ec, const struct ecard_id *id) { struct net_device *dev; + u8 addr[ETH_ALEN]; int i, ret = 0; ether1_banner(); @@ -1015,7 +1016,8 @@ ether1_probe(struct expansion_card *ec, const struct ecard_id *id) } for (i = 0; i < 6; i++) - dev->dev_addr[i] = readb(IDPROM_ADDRESS + (i << 2)); + addr[i] = readb(IDPROM_ADDRESS + (i << 2)); + eth_hw_addr_set(dev, addr); if (ether1_init_2(dev)) { ret = -ENODEV; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 59536bd5cab1bd19d437e8e696fb728b0b691b2d..29617a86b2995b882a5a74f7017d40339fdc1889 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -110,6 +110,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter, struct ibmvnic_sub_crq_queue *tx_scrq); static void free_long_term_buff(struct ibmvnic_adapter *adapter, struct ibmvnic_long_term_buff *ltb); +static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter); struct ibmvnic_stat { char name[ETH_GSTRING_LEN]; @@ -1424,7 +1425,7 @@ static int __ibmvnic_open(struct net_device *netdev) rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_UP); if (rc) { ibmvnic_napi_disable(adapter); - release_resources(adapter); + ibmvnic_disable_irqs(adapter); return rc; } @@ -1474,9 +1475,6 @@ static int ibmvnic_open(struct net_device *netdev) rc = init_resources(adapter); if (rc) { netdev_err(netdev, "failed to initialize resources\n"); - release_resources(adapter); - release_rx_pools(adapter); - release_tx_pools(adapter); goto out; } } @@ -1493,6 +1491,13 @@ out: adapter->state = VNIC_OPEN; rc = 0; } + + if (rc) { + release_resources(adapter); + release_rx_pools(adapter); + release_tx_pools(adapter); + } + return rc; } @@ -2602,6 +2607,7 @@ static void __ibmvnic_reset(struct work_struct *work) struct ibmvnic_rwi *rwi; unsigned long flags; u32 reset_state; + int num_fails = 0; int rc = 0; adapter = container_of(work, struct ibmvnic_adapter, ibmvnic_reset); @@ -2655,11 +2661,23 @@ static void __ibmvnic_reset(struct work_struct *work) rc = do_hard_reset(adapter, rwi, reset_state); rtnl_unlock(); } - if (rc) { - /* give backing device time to settle down */ + if (rc) + num_fails++; + else + num_fails = 0; + + /* If auto-priority-failover is enabled we can get + * back to back failovers during resets, resulting + * in at least two failed resets (from high-priority + * backing device to low-priority one and then back) + * If resets continue to fail beyond that, give the + * adapter some time to settle down before retrying. + */ + if (num_fails >= 3) { netdev_dbg(adapter->netdev, - "[S:%s] Hard reset failed, waiting 60 secs\n", - adapter_state_to_string(adapter->state)); + "[S:%s] Hard reset failed %d times, waiting 60 secs\n", + adapter_state_to_string(adapter->state), + num_fails); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(60 * HZ); } @@ -3844,11 +3862,25 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; int max_entries; + int cap_reqs; + + /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on + * the PROMISC flag). Initialize this count upfront. When the tasklet + * receives a response to all of these, it will send the next protocol + * message (QUERY_IP_OFFLOAD). + */ + if (!(adapter->netdev->flags & IFF_PROMISC) || + adapter->promisc_supported) + cap_reqs = 7; + else + cap_reqs = 6; if (!retry) { /* Sub-CRQ entries are 32 byte long */ int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4); + atomic_set(&adapter->running_cap_crqs, cap_reqs); + if (adapter->min_tx_entries_per_subcrq > entries_page || adapter->min_rx_add_entries_per_subcrq > entries_page) { dev_err(dev, "Fatal, invalid entries per sub-crq\n"); @@ -3909,44 +3941,45 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; + } else { + atomic_add(cap_reqs, &adapter->running_cap_crqs); } - memset(&crq, 0, sizeof(crq)); crq.request_capability.first = IBMVNIC_CRQ_CMD; crq.request_capability.cmd = REQUEST_CAPABILITY; crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_tx_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_MTU); crq.request_capability.number = cpu_to_be64(adapter->req_mtu); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); if (adapter->netdev->flags & IFF_PROMISC) { @@ -3954,16 +3987,21 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(1); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } } else { crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(0); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static int pending_scrq(struct ibmvnic_adapter *adapter, @@ -4357,118 +4395,132 @@ static void send_query_map(struct ibmvnic_adapter *adapter) static void send_query_cap(struct ibmvnic_adapter *adapter) { union ibmvnic_crq crq; + int cap_reqs; + + /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count + * upfront. When the tasklet receives a response to all of these, it + * can send out the next protocol messaage (REQUEST_CAPABILITY). + */ + cap_reqs = 25; + + atomic_set(&adapter->running_cap_crqs, cap_reqs); - atomic_set(&adapter->running_cap_crqs, 0); memset(&crq, 0, sizeof(crq)); crq.query_capability.first = IBMVNIC_CRQ_CMD; crq.query_capability.cmd = QUERY_CAPABILITY; crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ); - atomic_inc(&adapter->running_cap_crqs); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static void send_query_ip_offload(struct ibmvnic_adapter *adapter) @@ -4772,6 +4824,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, char *name; atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n", + atomic_read(&adapter->running_cap_crqs)); switch (be16_to_cpu(crq->request_capability_rsp.capability)) { case REQ_TX_QUEUES: req_value = &adapter->req_tx_queues; @@ -4835,10 +4889,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, } /* Done receiving requested capabilities, query IP offload support */ - if (atomic_read(&adapter->running_cap_crqs) == 0) { - adapter->wait_capability = false; + if (atomic_read(&adapter->running_cap_crqs) == 0) send_query_ip_offload(adapter); - } } static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, @@ -5136,10 +5188,8 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq, } out: - if (atomic_read(&adapter->running_cap_crqs) == 0) { - adapter->wait_capability = false; + if (atomic_read(&adapter->running_cap_crqs) == 0) send_request_cap(adapter, 0); - } } static int send_query_phys_parms(struct ibmvnic_adapter *adapter) @@ -5435,33 +5485,21 @@ static void ibmvnic_tasklet(struct tasklet_struct *t) struct ibmvnic_crq_queue *queue = &adapter->crq; union ibmvnic_crq *crq; unsigned long flags; - bool done = false; spin_lock_irqsave(&queue->lock, flags); - while (!done) { - /* Pull all the valid messages off the CRQ */ - while ((crq = ibmvnic_next_crq(adapter)) != NULL) { - /* This barrier makes sure ibmvnic_next_crq()'s - * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded - * before ibmvnic_handle_crq()'s - * switch(gen_crq->first) and switch(gen_crq->cmd). - */ - dma_rmb(); - ibmvnic_handle_crq(crq, adapter); - crq->generic.first = 0; - } - /* remain in tasklet until all - * capabilities responses are received + /* Pull all the valid messages off the CRQ */ + while ((crq = ibmvnic_next_crq(adapter)) != NULL) { + /* This barrier makes sure ibmvnic_next_crq()'s + * crq->generic.first & IBMVNIC_CRQ_CMD_RSP is loaded + * before ibmvnic_handle_crq()'s + * switch(gen_crq->first) and switch(gen_crq->cmd). */ - if (!adapter->wait_capability) - done = true; + dma_rmb(); + ibmvnic_handle_crq(crq, adapter); + crq->generic.first = 0; } - /* if capabilities CRQ's were sent in this tasklet, the following - * tasklet must wait until all responses are received - */ - if (atomic_read(&adapter->running_cap_crqs) != 0) - adapter->wait_capability = true; + spin_unlock_irqrestore(&queue->lock, flags); } diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 4a8f36e0ab076620a704561aec3d3e882345cb65..4a7a56ff74ce16af8682808b9ae41c68e90a39b6 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -919,7 +919,6 @@ struct ibmvnic_adapter { int login_rsp_buf_sz; atomic_t running_cap_crqs; - bool wait_capability; struct ibmvnic_sub_crq_queue **tx_scrq ____cacheline_aligned; struct ibmvnic_sub_crq_queue **rx_scrq ____cacheline_aligned; diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index c3def0ee77884b57f05fcd4a6c8c21eed58be64c..8d06c9d8ff8b155c188de9fa4f393eb03054ae30 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -115,7 +115,8 @@ enum e1000_boards { board_pch_lpt, board_pch_spt, board_pch_cnp, - board_pch_tgp + board_pch_tgp, + board_pch_adp }; struct e1000_ps_page { @@ -502,6 +503,7 @@ extern const struct e1000_info e1000_pch_lpt_info; extern const struct e1000_info e1000_pch_spt_info; extern const struct e1000_info e1000_pch_cnp_info; extern const struct e1000_info e1000_pch_tgp_info; +extern const struct e1000_info e1000_pch_adp_info; extern const struct e1000_info e1000_es2_info; void e1000e_ptp_init(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 5e4fc9b4e2adb00c467dc6c9f6000b5daa5f54cc..c908c84b86d222b5be4bbcc53a50106a92248fdd 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -6021,3 +6021,23 @@ const struct e1000_info e1000_pch_tgp_info = { .phy_ops = &ich8_phy_ops, .nvm_ops = &spt_nvm_ops, }; + +const struct e1000_info e1000_pch_adp_info = { + .mac = e1000_pch_adp, + .flags = FLAG_IS_ICH + | FLAG_HAS_WOL + | FLAG_HAS_HW_TIMESTAMP + | FLAG_HAS_CTRLEXT_ON_LOAD + | FLAG_HAS_AMT + | FLAG_HAS_FLASH + | FLAG_HAS_JUMBO_FRAMES + | FLAG_APME_IN_WUC, + .flags2 = FLAG2_HAS_PHY_STATS + | FLAG2_HAS_EEE, + .pba = 26, + .max_hw_frame_size = 9022, + .get_variants = e1000_get_variants_ich8lan, + .mac_ops = &ich8_mac_ops, + .phy_ops = &ich8_phy_ops, + .nvm_ops = &spt_nvm_ops, +}; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 635a95927e93005b1ee2091c7c8d5dfb9a3ee1fb..a42aeb555f343400721993e95f6a7d31397f3649 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -52,6 +52,7 @@ static const struct e1000_info *e1000_info_tbl[] = { [board_pch_spt] = &e1000_pch_spt_info, [board_pch_cnp] = &e1000_pch_cnp_info, [board_pch_tgp] = &e1000_pch_tgp_info, + [board_pch_adp] = &e1000_pch_adp_info, }; struct e1000_reg_info { @@ -6341,7 +6342,8 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) u32 mac_data; u16 phy_data; - if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && + hw->mac.type >= e1000_pch_adp) { /* Request ME configure the device for S0ix */ mac_data = er32(H2ME); mac_data |= E1000_H2ME_START_DPG; @@ -6490,7 +6492,8 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) u16 phy_data; u32 i = 0; - if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID) { + if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID && + hw->mac.type >= e1000_pch_adp) { /* Request ME unconfigure the device from S0ix */ mac_data = er32(H2ME); mac_data &= ~E1000_H2ME_START_DPG; @@ -7898,22 +7901,22 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_tgp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_tgp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_tgp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM23), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V23), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_LM22), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_RPL_I219_V22), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM20), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V20), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_LM21), board_pch_adp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_LNP_I219_V21), board_pch_adp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 4d939af0a626c79705f74caca02f4df0f2c62cc2..80c5cecaf2b5648ce4cb13e10061b985bb180b47 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -144,6 +144,7 @@ enum i40e_state_t { __I40E_VIRTCHNL_OP_PENDING, __I40E_RECOVERY_MODE, __I40E_VF_RESETS_DISABLED, /* disable resets during i40e_remove */ + __I40E_IN_REMOVE, __I40E_VFS_RELEASING, /* This must be last as it determines the size of the BITMAP */ __I40E_STATE_SIZE__, @@ -174,7 +175,6 @@ enum i40e_interrupt_policy { struct i40e_lump_tracking { u16 num_entries; - u16 search_hint; u16 list[0]; #define I40E_PILE_VALID_BIT 0x8000 #define I40E_IWARP_IRQ_PILE_ID (I40E_PILE_VALID_BIT - 2) @@ -848,12 +848,12 @@ struct i40e_vsi { struct rtnl_link_stats64 net_stats_offsets; struct i40e_eth_stats eth_stats; struct i40e_eth_stats eth_stats_offsets; - u32 tx_restart; - u32 tx_busy; + u64 tx_restart; + u64 tx_busy; u64 tx_linearize; u64 tx_force_wb; - u32 rx_buf_failed; - u32 rx_page_failed; + u64 rx_buf_failed; + u64 rx_page_failed; /* These are containers of ring pointers, allocated at run-time */ struct i40e_ring **rx_rings; diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 2c1b1da1220eca3bb0b5e8a8cf4fe54ce1a70ed2..1e57cc8c47d7bbd7ee15e632c3b6ee097f18e191 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -240,7 +240,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) (unsigned long int)vsi->net_stats_offsets.rx_compressed, (unsigned long int)vsi->net_stats_offsets.tx_compressed); dev_info(&pf->pdev->dev, - " tx_restart = %d, tx_busy = %d, rx_buf_failed = %d, rx_page_failed = %d\n", + " tx_restart = %llu, tx_busy = %llu, rx_buf_failed = %llu, rx_page_failed = %llu\n", vsi->tx_restart, vsi->tx_busy, vsi->rx_buf_failed, vsi->rx_page_failed); rcu_read_lock(); diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 2a3d8aef7f4e85a82c11e976804f03efe38741f1..0c4b7dfb3b35d583e48adb670a875834ebe9b113 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -196,10 +196,6 @@ int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem) * @id: an owner id to stick on the items assigned * * Returns the base item index of the lump, or negative for error - * - * The search_hint trick and lack of advanced fit-finding only work - * because we're highly likely to have all the same size lump requests. - * Linear search time and any fragmentation should be minimal. **/ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, u16 needed, u16 id) @@ -214,8 +210,21 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, return -EINVAL; } - /* start the linear search with an imperfect hint */ - i = pile->search_hint; + /* Allocate last queue in the pile for FDIR VSI queue + * so it doesn't fragment the qp_pile + */ + if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) { + if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) { + dev_err(&pf->pdev->dev, + "Cannot allocate queue %d for I40E_VSI_FDIR\n", + pile->num_entries - 1); + return -ENOMEM; + } + pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT; + return pile->num_entries - 1; + } + + i = 0; while (i < pile->num_entries) { /* skip already allocated entries */ if (pile->list[i] & I40E_PILE_VALID_BIT) { @@ -234,7 +243,6 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, for (j = 0; j < needed; j++) pile->list[i+j] = id | I40E_PILE_VALID_BIT; ret = i; - pile->search_hint = i + j; break; } @@ -257,7 +265,7 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) { int valid_id = (id | I40E_PILE_VALID_BIT); int count = 0; - int i; + u16 i; if (!pile || index >= pile->num_entries) return -EINVAL; @@ -269,8 +277,6 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) count++; } - if (count && index < pile->search_hint) - pile->search_hint = index; return count; } @@ -772,9 +778,9 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct rtnl_link_stats64 *ns; /* netdev stats */ struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ - u32 tx_restart, tx_busy; + u64 tx_restart, tx_busy; struct i40e_ring *p; - u32 rx_page, rx_buf; + u64 rx_page, rx_buf; u64 bytes, packets; unsigned int start; u64 tx_linearize; @@ -5366,7 +5372,15 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, /* There is no need to reset BW when mqprio mode is on. */ if (pf->flags & I40E_FLAG_TC_MQPRIO) return 0; - if (!vsi->mqprio_qopt.qopt.hw && !(pf->flags & I40E_FLAG_DCB_ENABLED)) { + + if (!vsi->mqprio_qopt.qopt.hw) { + if (pf->flags & I40E_FLAG_DCB_ENABLED) + goto skip_reset; + + if (IS_ENABLED(CONFIG_I40E_DCB) && + i40e_dcb_hw_get_num_tc(&pf->hw) == 1) + goto skip_reset; + ret = i40e_set_bw_limit(vsi, vsi->seid, 0); if (ret) dev_info(&pf->pdev->dev, @@ -5374,6 +5388,8 @@ static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc, vsi->seid); return ret; } + +skip_reset: memset(&bw_data, 0, sizeof(bw_data)); bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) @@ -10574,15 +10590,9 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } i40e_get_oem_version(&pf->hw); - if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && - ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) || - hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) { - /* The following delay is necessary for 4.33 firmware and older - * to recover after EMP reset. 200 ms should suffice but we - * put here 300 ms to be sure that FW is ready to operate - * after reset. - */ - mdelay(300); + if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) { + /* The following delay is necessary for firmware update. */ + mdelay(1000); } /* re-verify the eeprom if we just had an EMP reset */ @@ -10853,6 +10863,9 @@ static void i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) { int ret; + + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return; /* Now we wait for GRST to settle out. * We don't have to delete the VEBs or VSIs from the hw switch * because the reset will make them disappear. @@ -11792,7 +11805,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) return -ENOMEM; pf->irq_pile->num_entries = vectors; - pf->irq_pile->search_hint = 0; /* track first vector for misc interrupts, ignore return */ (void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1); @@ -12213,6 +12225,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count) vsi->req_queue_pairs = queue_count; i40e_prep_for_reset(pf); + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return pf->alloc_rss_size; pf->alloc_rss_size = new_rss_size; @@ -12595,7 +12609,6 @@ static int i40e_sw_init(struct i40e_pf *pf) goto sw_init_done; } pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp; - pf->qp_pile->search_hint = 0; pf->tx_timeout_recovery_level = 1; @@ -13040,6 +13053,10 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, if (need_reset) i40e_prep_for_reset(pf); + /* VSI shall be deleted in a moment, just return EINVAL */ + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return -EINVAL; + old_prog = xchg(&vsi->xdp_prog, prog); if (need_reset) { @@ -15930,8 +15947,13 @@ static void i40e_remove(struct pci_dev *pdev) i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), 0); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), 0); - while (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) + /* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE + * flags, once they are set, i40e_rebuild should not be called as + * i40e_prep_for_reset always returns early. + */ + while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) usleep_range(1000, 2000); + set_bit(__I40E_IN_REMOVE, pf->state); if (pf->flags & I40E_FLAG_SRIOV_ENABLED) { set_bit(__I40E_VF_RESETS_DISABLED, pf->state); @@ -16130,6 +16152,9 @@ static void i40e_pci_error_reset_done(struct pci_dev *pdev) { struct i40e_pf *pf = pci_get_drvdata(pdev); + if (test_bit(__I40E_IN_REMOVE, pf->state)) + return; + i40e_reset_and_rebuild(pf, false, false); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 8d0588a27a0531c14b002fd0fe6e03c2c01c5386..1908eed4fa5ee946761ec807e092837e6dcfab1e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -413,6 +413,9 @@ #define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */ #define I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT 1 #define I40E_VFINT_DYN_CTLN_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT) +#define I40E_VFINT_ICR0_ADMINQ_SHIFT 30 +#define I40E_VFINT_ICR0_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ADMINQ_SHIFT) +#define I40E_VFINT_ICR0_ENA(_VF) (0x0002C000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */ #define I40E_VPINT_AEQCTL(_VF) (0x0002B800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */ #define I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT 0 #define I40E_VPINT_AEQCTL_ITR_INDX_SHIFT 11 diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index b785d09c19f80ec7d0a76eceec743a159281c456..dfdb6e786461a5d5eaf153f56a98b5a37aa8c27c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1376,6 +1376,32 @@ static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf, return aq_ret; } +/** + * i40e_sync_vfr_reset + * @hw: pointer to hw struct + * @vf_id: VF identifier + * + * Before trigger hardware reset, we need to know if no other process has + * reserved the hardware for any reset operations. This check is done by + * examining the status of the RSTAT1 register used to signal the reset. + **/ +static int i40e_sync_vfr_reset(struct i40e_hw *hw, int vf_id) +{ + u32 reg; + int i; + + for (i = 0; i < I40E_VFR_WAIT_COUNT; i++) { + reg = rd32(hw, I40E_VFINT_ICR0_ENA(vf_id)) & + I40E_VFINT_ICR0_ADMINQ_MASK; + if (reg) + return 0; + + usleep_range(100, 200); + } + + return -EAGAIN; +} + /** * i40e_trigger_vf_reset * @vf: pointer to the VF structure @@ -1390,9 +1416,11 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr) struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; u32 reg, reg_idx, bit_idx; + bool vf_active; + u32 radq; /* warn the VF */ - clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); + vf_active = test_and_clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); /* Disable VF's configuration API during reset. The flag is re-enabled * in i40e_alloc_vf_res(), when it's safe again to access VF's VSI. @@ -1406,7 +1434,19 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr) * just need to clean up, so don't hit the VFRTRIG register. */ if (!flr) { - /* reset VF using VPGEN_VFRTRIG reg */ + /* Sync VFR reset before trigger next one */ + radq = rd32(hw, I40E_VFINT_ICR0_ENA(vf->vf_id)) & + I40E_VFINT_ICR0_ADMINQ_MASK; + if (vf_active && !radq) + /* waiting for finish reset by virtual driver */ + if (i40e_sync_vfr_reset(hw, vf->vf_id)) + dev_info(&pf->pdev->dev, + "Reset VF %d never finished\n", + vf->vf_id); + + /* Reset VF using VPGEN_VFRTRIG reg. It is also setting + * in progress state in rstat1 register. + */ reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id)); reg |= I40E_VPGEN_VFRTRIG_VFSWR_MASK; wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg); @@ -2617,6 +2657,59 @@ error_param: aq_ret); } +/** + * i40e_check_enough_queue - find big enough queue number + * @vf: pointer to the VF info + * @needed: the number of items needed + * + * Returns the base item index of the queue, or negative for error + **/ +static int i40e_check_enough_queue(struct i40e_vf *vf, u16 needed) +{ + unsigned int i, cur_queues, more, pool_size; + struct i40e_lump_tracking *pile; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi; + + vsi = pf->vsi[vf->lan_vsi_idx]; + cur_queues = vsi->alloc_queue_pairs; + + /* if current allocated queues are enough for need */ + if (cur_queues >= needed) + return vsi->base_queue; + + pile = pf->qp_pile; + if (cur_queues > 0) { + /* if the allocated queues are not zero + * just check if there are enough queues for more + * behind the allocated queues. + */ + more = needed - cur_queues; + for (i = vsi->base_queue + cur_queues; + i < pile->num_entries; i++) { + if (pile->list[i] & I40E_PILE_VALID_BIT) + break; + + if (more-- == 1) + /* there is enough */ + return vsi->base_queue; + } + } + + pool_size = 0; + for (i = 0; i < pile->num_entries; i++) { + if (pile->list[i] & I40E_PILE_VALID_BIT) { + pool_size = 0; + continue; + } + if (needed <= ++pool_size) + /* there is enough */ + return i; + } + + return -ENOMEM; +} + /** * i40e_vc_request_queues_msg * @vf: pointer to the VF info @@ -2651,6 +2744,12 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) req_pairs - cur_pairs, pf->queues_left); vfres->num_queue_pairs = pf->queues_left + cur_pairs; + } else if (i40e_check_enough_queue(vf, req_pairs) < 0) { + dev_warn(&pf->pdev->dev, + "VF %d requested %d more queues, but there is not enough for it.\n", + vf->vf_id, + req_pairs - cur_pairs); + vfres->num_queue_pairs = cur_pairs; } else { /* successful request */ vf->num_req_queues = req_pairs; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 49575a640a84c5e6c3abeaaae05427818b06fd87..03c42fd0fea19326916d302d67a7585456dc0a7c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -19,6 +19,7 @@ #define I40E_MAX_VF_PROMISC_FLAGS 3 #define I40E_VF_STATE_WAIT_COUNT 20 +#define I40E_VFR_WAIT_COUNT 100 /* Various queue ctrls */ enum i40e_queue_ctrl { diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 4e16d185077de32e6a09116850b29640995cdf78..a9fa701aaa951958f2432e2bb2ff0080b3c71306 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -483,6 +483,7 @@ enum ice_pf_flags { ICE_FLAG_VF_TRUE_PROMISC_ENA, ICE_FLAG_MDD_AUTO_RESET_VF, ICE_FLAG_LINK_LENIENT_MODE_ENA, + ICE_FLAG_PLUG_AUX_DEV, ICE_PF_FLAGS_NBITS /* must be last */ }; @@ -887,7 +888,7 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf) if (pf->hw.func_caps.common_cap.rdma && pf->num_rdma_msix) { set_bit(ICE_FLAG_RDMA_ENA, pf->flags); set_bit(ICE_FLAG_AUX_ENA, pf->flags); - ice_plug_aux_dev(pf); + set_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags); } } diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 408d15a5b0e3a16e66710e7307c1c23edf18f3bb..a6d7d3eff186841766f53ed7998c2852ed32ec51 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -3342,7 +3342,8 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, !ice_fw_supports_report_dflt_cfg(hw)) { struct ice_link_default_override_tlv tlv; - if (ice_get_link_default_override(&tlv, pi)) + status = ice_get_link_default_override(&tlv, pi); + if (status) goto out; if (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) && diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index e375ac849aecd6ef5dfd06c1ae821d12c635e2d1..4f954db01b92925637f58294ece8c4d10fa0681a 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -204,17 +204,39 @@ ice_lag_unlink(struct ice_lag *lag, lag->upper_netdev = NULL; } - if (lag->peer_netdev) { - dev_put(lag->peer_netdev); - lag->peer_netdev = NULL; - } - + lag->peer_netdev = NULL; ice_set_sriov_cap(pf); ice_set_rdma_cap(pf); lag->bonded = false; lag->role = ICE_LAG_NONE; } +/** + * ice_lag_unregister - handle netdev unregister events + * @lag: LAG info struct + * @netdev: netdev reporting the event + */ +static void ice_lag_unregister(struct ice_lag *lag, struct net_device *netdev) +{ + struct ice_pf *pf = lag->pf; + + /* check to see if this event is for this netdev + * check that we are in an aggregate + */ + if (netdev != lag->netdev || !lag->bonded) + return; + + if (lag->upper_netdev) { + dev_put(lag->upper_netdev); + lag->upper_netdev = NULL; + ice_set_sriov_cap(pf); + ice_set_rdma_cap(pf); + } + /* perform some cleanup in case we come back */ + lag->bonded = false; + lag->role = ICE_LAG_NONE; +} + /** * ice_lag_changeupper_event - handle LAG changeupper event * @lag: LAG info struct @@ -307,7 +329,7 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event, ice_lag_info_event(lag, ptr); break; case NETDEV_UNREGISTER: - ice_lag_unlink(lag, ptr); + ice_lag_unregister(lag, netdev); break; default: break; diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index d981dc6f232355122945e887ff54947b25889e9d..85a612838a898165a53146c5f3600d7754563e21 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -568,6 +568,7 @@ struct ice_tx_ctx_desc { (0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S) #define ICE_TXD_CTX_QW1_MSS_S 50 +#define ICE_TXD_CTX_MIN_MSS 64 #define ICE_TXD_CTX_QW1_VSI_S 50 #define ICE_TXD_CTX_QW1_VSI_M (0x3FFULL << ICE_TXD_CTX_QW1_VSI_S) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 30814435f7795e88d6239dfccddf77194c67949a..17a9bb461dc3a33961915b4ddb86f231aefca2ec 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2253,6 +2253,9 @@ static void ice_service_task(struct work_struct *work) return; } + if (test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) + ice_plug_aux_dev(pf); + ice_clean_adminq_subtask(pf); ice_check_media_subtask(pf); ice_check_for_hang_subtask(pf); @@ -8525,6 +8528,7 @@ ice_features_check(struct sk_buff *skb, struct net_device __always_unused *netdev, netdev_features_t features) { + bool gso = skb_is_gso(skb); size_t len; /* No point in doing any of this if neither checksum nor GSO are @@ -8537,24 +8541,32 @@ ice_features_check(struct sk_buff *skb, /* We cannot support GSO if the MSS is going to be less than * 64 bytes. If it is then we need to drop support for GSO. */ - if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64)) + if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS)) features &= ~NETIF_F_GSO_MASK; - len = skb_network_header(skb) - skb->data; + len = skb_network_offset(skb); if (len > ICE_TXD_MACLEN_MAX || len & 0x1) goto out_rm_features; - len = skb_transport_header(skb) - skb_network_header(skb); + len = skb_network_header_len(skb); if (len > ICE_TXD_IPLEN_MAX || len & 0x1) goto out_rm_features; if (skb->encapsulation) { - len = skb_inner_network_header(skb) - skb_transport_header(skb); - if (len > ICE_TXD_L4LEN_MAX || len & 0x1) - goto out_rm_features; + /* this must work for VXLAN frames AND IPIP/SIT frames, and in + * the case of IPIP frames, the transport header pointer is + * after the inner header! So check to make sure that this + * is a GRE or UDP_TUNNEL frame before doing that math. + */ + if (gso && (skb_shinfo(skb)->gso_type & + (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) { + len = skb_inner_network_header(skb) - + skb_transport_header(skb); + if (len > ICE_TXD_L4LEN_MAX || len & 0x1) + goto out_rm_features; + } - len = skb_inner_transport_header(skb) - - skb_inner_network_header(skb); + len = skb_inner_network_header_len(skb); if (len > ICE_TXD_IPLEN_MAX || len & 0x1) goto out_rm_features; } diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 0015fcf1df2b0257decff13d6583c14fdbd55238..0f293acd17e89933f274fe337c51e060b378b968 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1984,14 +1984,15 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) return; - set_ring_build_skb_enabled(rx_ring); + if (PAGE_SIZE < 8192) + if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB) + set_ring_uses_large_buffer(rx_ring); - if (PAGE_SIZE < 8192) { - if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB) - return; + /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */ + if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring)) + return; - set_ring_uses_large_buffer(rx_ring); - } + set_ring_build_skb_enabled(rx_ring); } /** diff --git a/drivers/net/ethernet/litex/Kconfig b/drivers/net/ethernet/litex/Kconfig index f99adbf26ab4e355914d774e873a7b893f6b530c..04345b929d8e5805cb7ab39660cfa908695ff5db 100644 --- a/drivers/net/ethernet/litex/Kconfig +++ b/drivers/net/ethernet/litex/Kconfig @@ -17,7 +17,7 @@ if NET_VENDOR_LITEX config LITEX_LITEETH tristate "LiteX Ethernet support" - depends on OF + depends on OF && HAS_IOMEM help If you wish to compile a kernel for hardware with a LiteX LiteEth device then you should answer Y to this. diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 186d00a9ab35c3a12ff6d0137980980ea2011b8a..3631d612aaca1d36e5496d12d3f89700fa2b811a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -1570,6 +1570,8 @@ static struct mac_ops cgx_mac_ops = { .mac_enadis_pause_frm = cgx_lmac_enadis_pause_frm, .mac_pause_frm_config = cgx_lmac_pause_frm_config, .mac_enadis_ptp_config = cgx_lmac_ptp_config, + .mac_rx_tx_enable = cgx_lmac_rx_tx_enable, + .mac_tx_enable = cgx_lmac_tx_enable, }; static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index fc6e7423cbd81d22623e8ff2c7e94f84b0dd0e87..b33e7d1d0851c7f08bd16d9e3098b083db8e512c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -107,6 +107,9 @@ struct mac_ops { void (*mac_enadis_ptp_config)(void *cgxd, int lmac_id, bool enable); + + int (*mac_rx_tx_enable)(void *cgxd, int lmac_id, bool enable); + int (*mac_tx_enable)(void *cgxd, int lmac_id, bool enable); }; struct cgx { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 4e79e918a161772f4826a8cd33185dc2b4c81250..58e2aeebc14f8db75ad877e8261ece1dfdc20fc7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -732,6 +732,7 @@ enum nix_af_status { NIX_AF_ERR_BANDPROF_INVAL_REQ = -428, NIX_AF_ERR_CQ_CTX_WRITE_ERR = -429, NIX_AF_ERR_AQ_CTX_RETRY_WRITE = -430, + NIX_AF_ERR_LINK_CREDITS = -431, }; /* For NIX RX vtag action */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h index 0fe7ad35e36fd148393e898d83363d88703b5043..4180376fa676366f037573fe03ca46519667b039 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h @@ -185,7 +185,6 @@ enum npc_kpu_parser_state { NPC_S_KPU2_QINQ, NPC_S_KPU2_ETAG, NPC_S_KPU2_EXDSA, - NPC_S_KPU2_NGIO, NPC_S_KPU2_CPT_CTAG, NPC_S_KPU2_CPT_QINQ, NPC_S_KPU3_CTAG, @@ -212,6 +211,7 @@ enum npc_kpu_parser_state { NPC_S_KPU5_NSH, NPC_S_KPU5_CPT_IP, NPC_S_KPU5_CPT_IP6, + NPC_S_KPU5_NGIO, NPC_S_KPU6_IP6_EXT, NPC_S_KPU6_IP6_HOP_DEST, NPC_S_KPU6_IP6_ROUT, @@ -1120,15 +1120,6 @@ static struct npc_kpu_profile_cam kpu1_cam_entries[] = { 0x0000, 0x0000, }, - { - NPC_S_KPU1_ETHER, 0xff, - NPC_ETYPE_CTAG, - 0xffff, - NPC_ETYPE_NGIO, - 0xffff, - 0x0000, - 0x0000, - }, { NPC_S_KPU1_ETHER, 0xff, NPC_ETYPE_CTAG, @@ -1966,6 +1957,15 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = { 0x0000, 0x0000, }, + { + NPC_S_KPU2_CTAG, 0xff, + NPC_ETYPE_NGIO, + 0xffff, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + }, { NPC_S_KPU2_CTAG, 0xff, NPC_ETYPE_PPPOE, @@ -2749,15 +2749,6 @@ static struct npc_kpu_profile_cam kpu2_cam_entries[] = { 0x0000, 0x0000, }, - { - NPC_S_KPU2_NGIO, 0xff, - 0x0000, - 0x0000, - 0x0000, - 0x0000, - 0x0000, - 0x0000, - }, { NPC_S_KPU2_CPT_CTAG, 0xff, NPC_ETYPE_IP, @@ -5089,6 +5080,15 @@ static struct npc_kpu_profile_cam kpu5_cam_entries[] = { 0x0000, 0x0000, }, + { + NPC_S_KPU5_NGIO, 0xff, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + }, { NPC_S_NA, 0X00, 0x0000, @@ -8422,14 +8422,6 @@ static struct npc_kpu_profile_action kpu1_action_entries[] = { 0, 0, 0, 0, 0, }, - { - NPC_ERRLEV_RE, NPC_EC_NOERR, - 8, 12, 0, 0, 0, - NPC_S_KPU2_NGIO, 12, 1, - NPC_LID_LA, NPC_LT_LA_ETHER, - 0, - 0, 0, 0, 0, - }, { NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 12, 0, 0, 0, @@ -9194,6 +9186,14 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = { 0, 0, 0, 0, 0, }, + { + NPC_ERRLEV_RE, NPC_EC_NOERR, + 0, 0, 0, 2, 0, + NPC_S_KPU5_NGIO, 6, 1, + NPC_LID_LB, NPC_LT_LB_CTAG, + 0, + 0, 0, 0, 0, + }, { NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 6, 2, 0, @@ -9890,14 +9890,6 @@ static struct npc_kpu_profile_action kpu2_action_entries[] = { NPC_F_LB_U_UNK_ETYPE | NPC_F_LB_L_EXDSA, 0, 0, 0, 0, }, - { - NPC_ERRLEV_RE, NPC_EC_NOERR, - 0, 0, 0, 0, 1, - NPC_S_NA, 0, 1, - NPC_LID_LC, NPC_LT_LC_NGIO, - 0, - 0, 0, 0, 0, - }, { NPC_ERRLEV_RE, NPC_EC_NOERR, 8, 0, 6, 2, 0, @@ -11973,6 +11965,14 @@ static struct npc_kpu_profile_action kpu5_action_entries[] = { 0, 0, 0, 0, 0, }, + { + NPC_ERRLEV_RE, NPC_EC_NOERR, + 0, 0, 0, 0, 1, + NPC_S_NA, 0, 1, + NPC_LID_LC, NPC_LT_LC_NGIO, + 0, + 0, 0, 0, 0, + }, { NPC_ERRLEV_LC, NPC_EC_UNK, 0, 0, 0, 0, 1, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index e695fa0e82a94c2e79679f86768319cc90123e69..9ea2f6ac38ec18fc1269bd59f0b787a05746b4c3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -30,6 +30,8 @@ static struct mac_ops rpm_mac_ops = { .mac_enadis_pause_frm = rpm_lmac_enadis_pause_frm, .mac_pause_frm_config = rpm_lmac_pause_frm_config, .mac_enadis_ptp_config = rpm_lmac_ptp_config, + .mac_rx_tx_enable = rpm_lmac_rx_tx_enable, + .mac_tx_enable = rpm_lmac_tx_enable, }; struct mac_ops *rpm_get_mac_ops(void) @@ -54,6 +56,43 @@ int rpm_get_nr_lmacs(void *rpmd) return hweight8(rpm_read(rpm, 0, CGXX_CMRX_RX_LMACS) & 0xFULL); } +int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable) +{ + rpm_t *rpm = rpmd; + u64 cfg, last; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + last = cfg; + if (enable) + cfg |= RPM_TX_EN; + else + cfg &= ~(RPM_TX_EN); + + if (cfg != last) + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + return !!(last & RPM_TX_EN); +} + +int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable) +{ + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG); + if (enable) + cfg |= RPM_RX_EN | RPM_TX_EN; + else + cfg &= ~(RPM_RX_EN | RPM_TX_EN); + rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + return 0; +} + void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable) { rpm_t *rpm = rpmd; @@ -252,23 +291,20 @@ int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable) if (!rpm || lmac_id >= rpm->lmac_count) return -ENODEV; lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id); - if (lmac_type == LMAC_MODE_100G_R) { - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1); - - if (enable) - cfg |= RPMX_MTI_PCS_LBK; - else - cfg &= ~RPMX_MTI_PCS_LBK; - rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg); - } else { - cfg = rpm_read(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1); - if (enable) - cfg |= RPMX_MTI_PCS_LBK; - else - cfg &= ~RPMX_MTI_PCS_LBK; - rpm_write(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1, cfg); + + if (lmac_type == LMAC_MODE_QSGMII || lmac_type == LMAC_MODE_SGMII) { + dev_err(&rpm->pdev->dev, "loopback not supported for LPC mode\n"); + return 0; } + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1); + + if (enable) + cfg |= RPMX_MTI_PCS_LBK; + else + cfg &= ~RPMX_MTI_PCS_LBK; + rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg); + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index 57c8a687b488a93f24835e35c6e46ad531093e9f..ff580311edd03ed7b93308ca49bf5bd3abd7a431 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -43,6 +43,8 @@ #define RPMX_MTI_STAT_DATA_HI_CDC 0x10038 #define RPM_LMAC_FWI 0xa +#define RPM_TX_EN BIT_ULL(0) +#define RPM_RX_EN BIT_ULL(1) /* Function Declarations */ int rpm_get_nr_lmacs(void *rpmd); @@ -57,4 +59,6 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, int rpm_get_tx_stats(void *rpmd, int lmac_id, int idx, u64 *tx_stat); int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat); void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable); +int rpm_lmac_rx_tx_enable(void *rpmd, int lmac_id, bool enable); +int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index 3ca6b942ebe2539ab9fc2d10f8cbe48934157588..54e1b27a7dfecb77ec6b43aa48d5a13994e671c3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -520,8 +520,11 @@ static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg) rvu_write64(rvu, blkaddr, rst_reg, BIT_ULL(0)); err = rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true); - if (err) - dev_err(rvu->dev, "HW block:%d reset failed\n", blkaddr); + if (err) { + dev_err(rvu->dev, "HW block:%d reset timeout retrying again\n", blkaddr); + while (rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true) == -EBUSY) + ; + } } static void rvu_reset_all_blocks(struct rvu *rvu) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 66e45d733824ef6bb995e0725d6385cd49cc220e..5ed94cfb47d2d13c43389c167d165d95cfa7ae75 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -806,6 +806,7 @@ bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature); u32 rvu_cgx_get_fifolen(struct rvu *rvu); void *rvu_first_cgx_pdata(struct rvu *rvu); int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id); +int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable); int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, int type); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 2ca182a4ce8236a37f64da7f9fdbe2e6ea2ad3fc..8a7ac5a8b821db81e021c9d2d72c556a38244bc4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -441,16 +441,26 @@ void rvu_cgx_enadis_rx_bp(struct rvu *rvu, int pf, bool enable) int rvu_cgx_config_rxtx(struct rvu *rvu, u16 pcifunc, bool start) { int pf = rvu_get_pf(pcifunc); + struct mac_ops *mac_ops; u8 cgx_id, lmac_id; + void *cgxd; if (!is_cgx_config_permitted(rvu, pcifunc)) return LMAC_AF_ERR_PERM_DENIED; rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); + cgxd = rvu_cgx_pdata(cgx_id, rvu); + mac_ops = get_mac_ops(cgxd); + + return mac_ops->mac_rx_tx_enable(cgxd, lmac_id, start); +} - cgx_lmac_rx_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, start); +int rvu_cgx_config_tx(void *cgxd, int lmac_id, bool enable) +{ + struct mac_ops *mac_ops; - return 0; + mac_ops = get_mac_ops(cgxd); + return mac_ops->mac_tx_enable(cgxd, lmac_id, enable); } void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index a09a507369ac3338edff46555fcff8242d7d1c0b..d1eddb769a419c801fdb7e4055ad095355dba2f6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -1224,6 +1224,8 @@ static void print_nix_cn10k_sq_ctx(struct seq_file *m, seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n", sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld); + seq_printf(m, "W3: smq_next_sq_vld\t\t%d\nW3: smq_pend\t\t\t%d\n", + sq_ctx->smq_next_sq_vld, sq_ctx->smq_pend); seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb); seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb); seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index d8b1948aaa0aef077598f633e00c3ff926a5ebab..97fb61915379a1b3befdb6a0a57bbcf7ba10a947 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -512,11 +512,11 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req, cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST); lmac_chan_cnt = cfg & 0xFF; - cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1); - sdp_chan_cnt = cfg & 0xFFF; - cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt; lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF); + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1); + sdp_chan_cnt = cfg & 0xFFF; sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt; pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc); @@ -2068,8 +2068,8 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, /* enable cgx tx if disabled */ if (is_pf_cgxmapped(rvu, pf)) { rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); - restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), - lmac_id, true); + restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), + lmac_id, true); } cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq)); @@ -2092,7 +2092,7 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, rvu_cgx_enadis_rx_bp(rvu, pf, true); /* restore cgx tx state */ if (restore_tx_en) - cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false); + rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false); return err; } @@ -3878,7 +3878,7 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link, /* Enable cgx tx if disabled for credits to be back */ if (is_pf_cgxmapped(rvu, pf)) { rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); - restore_tx_en = !cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), + restore_tx_en = !rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, true); } @@ -3891,8 +3891,8 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link, NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0)); } - rc = -EBUSY; - poll_tmo = jiffies + usecs_to_jiffies(10000); + rc = NIX_AF_ERR_LINK_CREDITS; + poll_tmo = jiffies + usecs_to_jiffies(200000); /* Wait for credits to return */ do { if (time_after(jiffies, poll_tmo)) @@ -3918,7 +3918,7 @@ exit: /* Restore state of cgx tx */ if (restore_tx_en) - cgx_lmac_tx_enable(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false); + rvu_cgx_config_tx(rvu_cgx_pdata(cgx_id, rvu), lmac_id, false); mutex_unlock(&rvu->rsrc_lock); return rc; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index c0005a1feee69ff178a1495b4594670da9aaa512..91f86d77cd41b7a2703992523c468cbeec2496b5 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -402,6 +402,7 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, int index, struct mcam_entry *entry, bool *enable) { + struct rvu_npc_mcam_rule *rule; u16 owner, target_func; struct rvu_pfvf *pfvf; u64 rx_action; @@ -423,6 +424,12 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam, test_bit(NIXLF_INITIALIZED, &pfvf->flags))) *enable = false; + /* fix up not needed for the rules added by user(ntuple filters) */ + list_for_each_entry(rule, &mcam->mcam_rules, list) { + if (rule->entry == index) + return; + } + /* copy VF default entry action to the VF mcam entry */ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr, target_func); @@ -489,8 +496,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam, } /* PF installing VF rule */ - if (intf == NIX_INTF_RX && actindex < mcam->bmap_entries) - npc_fixup_vf_rule(rvu, mcam, blkaddr, index, entry, &enable); + if (is_npc_intf_rx(intf) && actindex < mcam->bmap_entries) + npc_fixup_vf_rule(rvu, mcam, blkaddr, actindex, entry, &enable); /* Set 'action' */ rvu_write64(rvu, blkaddr, @@ -916,7 +923,8 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, u16 pcifunc, u64 rx_action) { int actindex, index, bank, entry; - bool enable; + struct rvu_npc_mcam_rule *rule; + bool enable, update; if (!(pcifunc & RVU_PFVF_FUNC_MASK)) return; @@ -924,6 +932,14 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam, mutex_lock(&mcam->lock); for (index = 0; index < mcam->bmap_entries; index++) { if (mcam->entry2target_pffunc[index] == pcifunc) { + update = true; + /* update not needed for the rules added via ntuple filters */ + list_for_each_entry(rule, &mcam->mcam_rules, list) { + if (rule->entry == index) + update = false; + } + if (!update) + continue; bank = npc_get_bank(mcam, index); actindex = index; entry = index & (mcam->banksize - 1); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c index ff2b21999f36f76a8715cef9a0a53149c6ab95b0..19c53e591d0daf2cbdd54ec4e6a0c7c83fa9394b 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c @@ -1098,14 +1098,6 @@ find_rule: write_req.cntr = rule->cntr; } - err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, - &write_rsp); - if (err) { - rvu_mcam_remove_counter_from_rule(rvu, owner, rule); - if (new) - kfree(rule); - return err; - } /* update rule */ memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet)); memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask)); @@ -1132,6 +1124,18 @@ find_rule: if (req->default_rule) pfvf->def_ucast_rule = rule; + /* write to mcam entry registers */ + err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req, + &write_rsp); + if (err) { + rvu_mcam_remove_counter_from_rule(rvu, owner, rule); + if (new) { + list_del(&rule->list); + kfree(rule); + } + return err; + } + /* VF's MAC address is being changed via PF */ if (pf_set_vfs_mac) { ether_addr_copy(pfvf->default_mac, req->packet.dmac); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 61e52812983fa3fc6a21d55f075297cc5ca2977a..14509fc64cce9d894ba450624302f7b7a5376a21 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -603,6 +603,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, size++; tar_addr |= ((size - 1) & 0x7) << 4; } + dma_wmb(); memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs); /* Perform LMTST flush */ cn10k_lmt_flush(val, tar_addr); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 6080ebd9bd947eb31645f63b244cadd7f4e26a54..d39341e4ab378db33a774d2eb843f3daa417b8c4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -394,7 +394,12 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf, dst_mdev->msg_size = mbox_hdr->msg_size; dst_mdev->num_msgs = num_msgs; err = otx2_sync_mbox_msg(dst_mbox); - if (err) { + /* Error code -EIO indicate there is a communication failure + * to the AF. Rest of the error codes indicate that AF processed + * VF messages and set the error codes in response messages + * (if any) so simply forward responses to VF. + */ + if (err == -EIO) { dev_warn(pf->dev, "AF not responding to VF%d messages\n", vf); /* restore PF mbase and exit */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 812e6810cb3bc7e7802a3944a73a35bddb2a20e7..c14e06ca64d8d7f3dbc25c4839af6b5879f3c8a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -224,7 +224,7 @@ static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + struct mlx5_wqe_data_seg data[]; }; struct mlx5e_rx_wqe_ll { @@ -241,8 +241,8 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_umr_ctrl_seg uctrl; struct mlx5_mkey_seg mkc; union { - struct mlx5_mtt inline_mtts[0]; - struct mlx5_klm inline_klms[0]; + DECLARE_FLEX_ARRAY(struct mlx5_mtt, inline_mtts); + DECLARE_FLEX_ARRAY(struct mlx5_klm, inline_klms); }; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c index 00449df98a5efb89f48c7e5b5d03a4a71ccb80b4..c1e07496c89c717ec114f6a99262a5bd43b1782b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/qos.c @@ -570,7 +570,8 @@ static int mlx5e_htb_convert_rate(struct mlx5e_priv *priv, u64 rate, static void mlx5e_htb_convert_ceil(struct mlx5e_priv *priv, u64 ceil, u32 *max_average_bw) { - *max_average_bw = div_u64(ceil, BYTES_IN_MBIT); + /* Hardware treats 0 as "unlimited", set at least 1. */ + *max_average_bw = max_t(u32, div_u64(ceil, BYTES_IN_MBIT), 1); qos_dbg(priv->mdev, "Convert: ceil %llu -> max_average_bw %u\n", ceil, *max_average_bw); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index 9c076aa20306a7d2bd116b85dfac000bcafaa3f0..b6f5c1bcdbcd493f4ec4ea9bae6eb904454ccce5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) { - struct mlx5e_rep_priv *rpriv; - struct mlx5e_priv *priv; - - /* A given netdev is not a representor or not a slave of LAG configuration */ - if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev)) - return false; - - priv = netdev_priv(netdev); - rpriv = priv->ppriv; - - /* Egress acl forward to vport is supported only non-uplink representor */ - return rpriv->rep->vport != MLX5_VPORT_UPLINK; + return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev); } static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) @@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt u16 fwd_vport_num; int err; - if (!mlx5e_rep_is_lag_netdev(netdev)) - return; - info = ptr; lag_info = info->lower_state_info; /* This is not an event of a representor becoming active slave */ @@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) struct net_device *lag_dev; struct mlx5e_priv *priv; - if (!mlx5e_rep_is_lag_netdev(netdev)) - return; - priv = netdev_priv(netdev); rpriv = priv->ppriv; lag_dev = info->upper_dev; @@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_bond *bond; + struct mlx5e_priv *priv; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return NOTIFY_DONE; + + bond = container_of(nb, struct mlx5e_rep_bond, nb); + priv = netdev_priv(netdev); + rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH); + /* Verify VF representor is on the same device of the bond handling the netevent. */ + if (rpriv->uplink_priv.bond != bond) + return NOTIFY_DONE; switch (event) { case NETDEV_CHANGELOWERSTATE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index c6d2f8c78db71ab2342cbb368de174831724e102..48dc121b2cb4cf9a6780a7db230be7cdb2e5f19a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -491,7 +491,7 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) } br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event; - err = register_netdevice_notifier(&br_offloads->netdev_nb); + err = register_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb); if (err) { esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n", err); @@ -509,7 +509,9 @@ err_register_swdev_blk: err_register_swdev: destroy_workqueue(br_offloads->wq); err_alloc_wq: + rtnl_lock(); mlx5_esw_bridge_cleanup(esw); + rtnl_unlock(); } void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) @@ -524,7 +526,7 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) return; cancel_delayed_work_sync(&br_offloads->update_work); - unregister_netdevice_notifier(&br_offloads->netdev_nb); + unregister_netdevice_notifier_net(&init_net, &br_offloads->netdev_nb); unregister_switchdev_blocking_notifier(&br_offloads->nb_blk); unregister_switchdev_notifier(&br_offloads->nb); destroy_workqueue(br_offloads->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 4cdf8e5b24c229f421ccce0c966772f7e4fa11e5..b789af07829c03e9ac9e758c8024ae7ac7eb802b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -167,6 +167,11 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size) return pi; } +static inline u16 mlx5e_shampo_get_cqe_header_index(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + return be16_to_cpu(cqe->shampo.header_entry_index) & (rq->mpwqe.shampo->hd_per_wq - 1); +} + struct mlx5e_shampo_umr { u16 len; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 338d65e2c9ce8d88f39842c01b26d0760452c31e..56e10c84a70684f415a5c99f654f0410f050524b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -341,8 +341,10 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, /* copy the inline part if required */ if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) { - memcpy(eseg->inline_hdr.start, xdptxd->data, MLX5E_XDP_MIN_INLINE); + memcpy(eseg->inline_hdr.start, xdptxd->data, sizeof(eseg->inline_hdr.start)); eseg->inline_hdr.sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + memcpy(dseg, xdptxd->data + sizeof(eseg->inline_hdr.start), + MLX5E_XDP_MIN_INLINE - sizeof(eseg->inline_hdr.start)); dma_len -= MLX5E_XDP_MIN_INLINE; dma_addr += MLX5E_XDP_MIN_INLINE; dseg++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c index 2db9573a3fe69d9f175c663334cc6c6c14ad5ec0..b56fea142c24679346fa2820dfb6a000442315e9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c @@ -157,11 +157,20 @@ static void mlx5e_ipsec_set_swp(struct sk_buff *skb, /* Tunnel mode */ if (mode == XFRM_MODE_TUNNEL) { eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; - eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; if (xo->proto == IPPROTO_IPV6) eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; - if (inner_ip_hdr(skb)->protocol == IPPROTO_UDP) + + switch (xo->inner_ipproto) { + case IPPROTO_UDP: eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + /* IP | ESP | IP | [TCP | UDP] */ + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + default: + break; + } return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index b98db50c3418d9daad27e5fcaa756c8126557265..428881e0adcbeae1d5d70a84c27427986958972e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -131,14 +131,17 @@ static inline bool mlx5e_ipsec_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { - struct xfrm_offload *xo = xfrm_offload(skb); + u8 inner_ipproto; if (!mlx5e_ipsec_eseg_meta(eseg)) return false; eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; - if (xo->inner_ipproto) { - eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM | MLX5_ETH_WQE_L3_INNER_CSUM; + inner_ipproto = xfrm_offload(skb)->inner_ipproto; + if (inner_ipproto) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP) + eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; sq->stats->csum_partial_inner++; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e86ccc22fb827164a63df4cbb8d9c42f72b83034..ee0a8f5206e3a00df96c3edb5429b00dee36f28f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1117,7 +1117,7 @@ static void mlx5e_shampo_update_ipv6_udp_hdr(struct mlx5e_rq *rq, struct ipv6hdr static void mlx5e_shampo_update_fin_psh_flags(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct tcphdr *skb_tcp_hd) { - u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index); + u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); struct tcphdr *last_tcp_hd; void *last_hd_addr; @@ -1871,7 +1871,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, return skb; } -static void +static struct sk_buff * mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, struct mlx5_cqe64 *cqe, u16 header_index) { @@ -1895,7 +1895,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, skb = mlx5e_build_linear_skb(rq, hdr, frag_size, rx_headroom, head_size); if (unlikely(!skb)) - return; + return NULL; /* queue up for recycling/reuse */ page_ref_inc(head->page); @@ -1907,7 +1907,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, ALIGN(head_size, sizeof(long))); if (unlikely(!skb)) { rq->stats->buff_alloc_err++; - return; + return NULL; } prefetchw(skb->data); @@ -1918,9 +1918,7 @@ mlx5e_skb_from_cqe_shampo(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, skb->tail += head_size; skb->len += head_size; } - rq->hw_gro_data->skb = skb; - NAPI_GRO_CB(skb)->count = 1; - skb_shinfo(skb)->gso_size = mpwrq_get_cqe_byte_cnt(cqe) - head_size; + return skb; } static void @@ -1973,13 +1971,14 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index) static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 data_bcnt = mpwrq_get_cqe_byte_cnt(cqe) - cqe->shampo.header_size; - u16 header_index = be16_to_cpu(cqe->shampo.header_entry_index); + u16 header_index = mlx5e_shampo_get_cqe_header_index(rq, cqe); u32 wqe_offset = be32_to_cpu(cqe->shampo.data_offset); u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u32 data_offset = wqe_offset & (PAGE_SIZE - 1); u32 cqe_bcnt = mpwrq_get_cqe_byte_cnt(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); u32 page_idx = wqe_offset >> PAGE_SHIFT; + u16 head_size = cqe->shampo.header_size; struct sk_buff **skb = &rq->hw_gro_data->skb; bool flush = cqe->shampo.flush; bool match = cqe->shampo.match; @@ -2011,9 +2010,16 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } if (!*skb) { - mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); + if (likely(head_size)) + *skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index); + else + *skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset, + page_idx); if (unlikely(!*skb)) goto free_hd_entry; + + NAPI_GRO_CB(*skb)->count = 1; + skb_shinfo(*skb)->gso_size = cqe_bcnt - head_size; } else { NAPI_GRO_CB(*skb)->count++; if (NAPI_GRO_CB(*skb)->count == 2 && @@ -2027,8 +2033,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq } } - di = &wi->umr.dma_info[page_idx]; - mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset); + if (likely(head_size)) { + di = &wi->umr.dma_info[page_idx]; + mlx5e_fill_skb_data(*skb, rq, di, data_bcnt, data_offset); + } mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); if (flush) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3d908a7e140690acacf135385d378f965fb2e412..2022fa4a959850e929329b9c6c4255373339bc2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1414,7 +1414,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_out; - if (!attr->chain && esw_attr->int_port) { + if (!attr->chain && esw_attr->int_port && + attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { /* If decap route device is internal port, change the * source vport value in reg_c0 back to uplink just in * case the rule performs goto chain > 0. If we have a miss @@ -3191,6 +3192,18 @@ actions_match_supported(struct mlx5e_priv *priv, return false; } + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return false; + } + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return false; + } + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, ct_flow, ct_clear, extack)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 7fd33b356cc8d191413e8259acd0b26b3ebd6ba9..ee7ecb88adc155217ad64ed33386c6cf7fd8a5f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -208,7 +208,7 @@ static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs) int cpy1_sz = 2 * ETH_ALEN; int cpy2_sz = ihs - cpy1_sz; - memcpy(vhdr, skb->data, cpy1_sz); + memcpy(&vhdr->addrs, skb->data, cpy1_sz); vhdr->h_vlan_proto = skb->vlan_proto; vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb)); memcpy(&vhdr->h_vlan_encapsulated_proto, skb->data + cpy1_sz, cpy2_sz); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index f690f430f40f8047781c84a10b71ec681a210d6a..05e08cec5a8cf077dbafa8d2c73f4a2ca92cb0b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1574,6 +1574,8 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_offloads *br_offloads; + ASSERT_RTNL(); + br_offloads = kvzalloc(sizeof(*br_offloads), GFP_KERNEL); if (!br_offloads) return ERR_PTR(-ENOMEM); @@ -1590,6 +1592,8 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_offloads *br_offloads = esw->br_offloads; + ASSERT_RTNL(); + if (!br_offloads) return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h index 3401188e0a6023e337f6e4edcb1406434046e737..51ac24e6ec3c3f95519ce34a760b218185e2d27e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h @@ -21,7 +21,7 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template, __field(unsigned int, used) ), TP_fast_assign( - strncpy(__entry->dev_name, + strscpy(__entry->dev_name, netdev_name(fdb->dev), IFNAMSIZ); memcpy(__entry->addr, fdb->key.addr, ETH_ALEN); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 0b0234f9d694c47a9235171f905cdd987992db78..84dbe46d5ede63a6c8439e436dd0f806a2d420ec 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -132,7 +132,7 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; - del_timer(&fw_reset->timer); + del_timer_sync(&fw_reset->timer); } static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index d5e47630e2849115dea8028fc39d9d69cc37f933..df58cba37930aeb8b47a9f1a0b8079d60244e604 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -121,12 +121,13 @@ u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains) u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains) { - if (!mlx5_chains_prios_supported(chains)) - return 1; - if (mlx5_chains_ignore_flow_level_supported(chains)) return UINT_MAX; + if (!chains->dev->priv.eswitch || + chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS) + return 1; + /* We should get here only for eswitch case */ return FDB_TC_MAX_PRIO; } @@ -211,7 +212,7 @@ static int create_chain_restore(struct fs_chain *chain) { struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch; - char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)]; + u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; struct mlx5_fs_chains *chains = chain->chains; enum mlx5e_tc_attr_to_reg chain_to_reg; struct mlx5_modify_hdr *mod_hdr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 1ef2b6a848c1049ab82e6e53ca7ae4b8ea7ca809..7b16a1188aabbd0041edf45f48e70f3e4c16b26d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -406,23 +406,24 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, switch (module_id) { case MLX5_MODULE_ID_SFP: - mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset); + mlx5_sfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); break; case MLX5_MODULE_ID_QSFP: case MLX5_MODULE_ID_QSFP_PLUS: case MLX5_MODULE_ID_QSFP28: - mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &query.offset); + mlx5_qsfp_eeprom_params_set(&query.i2c_address, &query.page, &offset); break; default: mlx5_core_err(dev, "Module ID not recognized: 0x%x\n", module_id); return -EINVAL; } - if (query.offset + size > MLX5_EEPROM_PAGE_LENGTH) + if (offset + size > MLX5_EEPROM_PAGE_LENGTH) /* Cross pages read, read until offset 256 in low page */ - size -= offset + size - MLX5_EEPROM_PAGE_LENGTH; + size = MLX5_EEPROM_PAGE_LENGTH - offset; query.size = size; + query.offset = offset; return mlx5_query_mcia(dev, &query, data); } diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c index ca5f1177963d1e248447c9bc4b702b9411b3c003..ce5970bdcc6a0791242a6663ae8b1e81f7897ad3 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_mac.c @@ -40,11 +40,12 @@ static int lan966x_mac_wait_for_completion(struct lan966x *lan966x) { u32 val; - return readx_poll_timeout(lan966x_mac_get_status, - lan966x, val, - (ANA_MACACCESS_MAC_TABLE_CMD_GET(val)) == - MACACCESS_CMD_IDLE, - TABLE_UPDATE_SLEEP_US, TABLE_UPDATE_TIMEOUT_US); + return readx_poll_timeout_atomic(lan966x_mac_get_status, + lan966x, val, + (ANA_MACACCESS_MAC_TABLE_CMD_GET(val)) == + MACACCESS_CMD_IDLE, + TABLE_UPDATE_SLEEP_US, + TABLE_UPDATE_TIMEOUT_US); } static void lan966x_mac_select(struct lan966x *lan966x, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index 2cb70da63db3c7e1507c43ca6e59a60ea4233771..1f60fd125a1dcf499f49bc6f75ddb4fbfc173253 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -182,9 +182,9 @@ static int lan966x_port_inj_ready(struct lan966x *lan966x, u8 grp) { u32 val; - return readx_poll_timeout(lan966x_port_inj_status, lan966x, val, - QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp), - READL_SLEEP_US, READL_TIMEOUT_US); + return readx_poll_timeout_atomic(lan966x_port_inj_status, lan966x, val, + QS_INJ_STATUS_FIFO_RDY_GET(val) & BIT(grp), + READL_SLEEP_US, READL_TIMEOUT_US); } static int lan966x_port_ifh_xmit(struct sk_buff *skb, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c index 59783fc46a7b9f7898f5738a8329e0b7791aa9fb..10b866e9f7266be5f95ec8695edfcc0ae2a1255f 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1103,7 +1103,7 @@ void sparx5_get_stats64(struct net_device *ndev, stats->tx_carrier_errors = portstats[spx5_stats_tx_csense_cnt]; stats->tx_window_errors = portstats[spx5_stats_tx_late_coll_cnt]; stats->rx_dropped = portstats[spx5_stats_ana_ac_port_stat_lsb_cnt]; - for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx, ++stats) + for (idx = 0; idx < 2 * SPX5_PRIOS; ++idx) stats->rx_dropped += portstats[spx5_stats_green_p0_rx_port_drop + idx]; stats->tx_dropped = portstats[spx5_stats_tx_local_drop]; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index dc7e5ea6ec158f9c67b8974767f8be3d735a796e..148d431fcde429f665332a7738fa943921959712 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -145,9 +145,9 @@ static void sparx5_xtr_grp(struct sparx5 *sparx5, u8 grp, bool byte_swap) skb_put(skb, byte_cnt - ETH_FCS_LEN); eth_skb_pad(skb); skb->protocol = eth_type_trans(skb, netdev); - netif_rx(skb); netdev->stats.rx_bytes += skb->len; netdev->stats.rx_packets++; + netif_rx(skb); } static int sparx5_inject(struct sparx5 *sparx5, diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 455293aa6343285555315689634858a7318cd241..e6de86552df0eeb433ca68a7de88210647720c17 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1432,6 +1432,8 @@ static void ocelot_populate_ipv4_ptp_event_trap_key(struct ocelot_vcap_filter *trap) { trap->key_type = OCELOT_VCAP_KEY_IPV4; + trap->key.ipv4.proto.value[0] = IPPROTO_UDP; + trap->key.ipv4.proto.mask[0] = 0xff; trap->key.ipv4.dport.value = PTP_EV_PORT; trap->key.ipv4.dport.mask = 0xffff; } @@ -1440,6 +1442,8 @@ static void ocelot_populate_ipv6_ptp_event_trap_key(struct ocelot_vcap_filter *trap) { trap->key_type = OCELOT_VCAP_KEY_IPV6; + trap->key.ipv4.proto.value[0] = IPPROTO_UDP; + trap->key.ipv4.proto.mask[0] = 0xff; trap->key.ipv6.dport.value = PTP_EV_PORT; trap->key.ipv6.dport.mask = 0xffff; } @@ -1448,6 +1452,8 @@ static void ocelot_populate_ipv4_ptp_general_trap_key(struct ocelot_vcap_filter *trap) { trap->key_type = OCELOT_VCAP_KEY_IPV4; + trap->key.ipv4.proto.value[0] = IPPROTO_UDP; + trap->key.ipv4.proto.mask[0] = 0xff; trap->key.ipv4.dport.value = PTP_GEN_PORT; trap->key.ipv4.dport.mask = 0xffff; } @@ -1456,6 +1462,8 @@ static void ocelot_populate_ipv6_ptp_general_trap_key(struct ocelot_vcap_filter *trap) { trap->key_type = OCELOT_VCAP_KEY_IPV6; + trap->key.ipv4.proto.value[0] = IPPROTO_UDP; + trap->key.ipv4.proto.mask[0] = 0xff; trap->key.ipv6.dport.value = PTP_GEN_PORT; trap->key.ipv6.dport.mask = 0xffff; } @@ -1737,12 +1745,11 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data) } EXPORT_SYMBOL(ocelot_get_strings); +/* Caller must hold &ocelot->stats_lock */ static void ocelot_update_stats(struct ocelot *ocelot) { int i, j; - mutex_lock(&ocelot->stats_lock); - for (i = 0; i < ocelot->num_phys_ports; i++) { /* Configure the port to read the stats from */ ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG); @@ -1761,8 +1768,6 @@ static void ocelot_update_stats(struct ocelot *ocelot) ~(u64)U32_MAX) + val; } } - - mutex_unlock(&ocelot->stats_lock); } static void ocelot_check_stats_work(struct work_struct *work) @@ -1771,7 +1776,9 @@ static void ocelot_check_stats_work(struct work_struct *work) struct ocelot *ocelot = container_of(del_work, struct ocelot, stats_work); + mutex_lock(&ocelot->stats_lock); ocelot_update_stats(ocelot); + mutex_unlock(&ocelot->stats_lock); queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); @@ -1781,12 +1788,16 @@ void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) { int i; + mutex_lock(&ocelot->stats_lock); + /* check and update now */ ocelot_update_stats(ocelot); /* Copy all counters */ for (i = 0; i < ocelot->num_stats; i++) *data++ = ocelot->stats[port * ocelot->num_stats + i]; + + mutex_unlock(&ocelot->stats_lock); } EXPORT_SYMBOL(ocelot_get_ethtool_stats); diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index dfb4468fe287ab3144134144d134a5a1fcdf3d66..0a326e04e6923351e2b9180144cfc504a3040c0b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -1011,6 +1011,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_repr_priv *repr_priv; struct nfp_tun_offloaded_mac *entry; struct nfp_repr *repr; + u16 nfp_mac_idx; int ida_idx; entry = nfp_tunnel_lookup_offloaded_macs(app, mac); @@ -1029,8 +1030,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, entry->bridge_count--; if (!entry->bridge_count && entry->ref_count) { - u16 nfp_mac_idx; - nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; if (__nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, false)) { @@ -1046,7 +1045,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, /* If MAC is now used by 1 repr set the offloaded MAC index to port. */ if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) { - u16 nfp_mac_idx; int port, err; repr_priv = list_first_entry(&entry->repr_list, @@ -1074,8 +1072,14 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs, &entry->ht_node, offloaded_macs_params)); + + if (nfp_flower_is_supported_bridge(netdev)) + nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; + else + nfp_mac_idx = entry->index; + /* If MAC has global ID then extract and free the ida entry. */ - if (nfp_tunnel_is_mac_idx_global(entry->index)) { + if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) { ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); } diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c index 16a4cbae93265c0c490a014c7b1446f6d632f7b1..c672f92d65e976bee9ee148c51ce2f9f9b62a110 100644 --- a/drivers/net/ethernet/seeq/ether3.c +++ b/drivers/net/ethernet/seeq/ether3.c @@ -749,6 +749,7 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id) const struct ether3_data *data = id->data; struct net_device *dev; int bus_type, ret; + u8 addr[ETH_ALEN]; ether3_banner(); @@ -776,7 +777,8 @@ ether3_probe(struct expansion_card *ec, const struct ecard_id *id) priv(dev)->seeq = priv(dev)->base + data->base_offset; dev->irq = ec->irq; - ether3_addr(dev->dev_addr, ec); + ether3_addr(addr, ec); + eth_hw_addr_set(dev, addr); priv(dev)->dev = dev; timer_setup(&priv(dev)->timer, ether3_ledoff, 0); diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index dd6f69ced4ee31f758681a878b3363a7388b3148..fc9cef9dcefc6d74f3de6e79f403c108a625d406 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -1648,7 +1648,7 @@ static int smc911x_ethtool_geteeprom(struct net_device *dev, return ret; if ((ret=smc911x_ethtool_read_eeprom_byte(dev, &eebuf[i]))!=0) return ret; - } + } memcpy(data, eebuf+eeprom->offset, eeprom->len); return 0; } @@ -1667,11 +1667,11 @@ static int smc911x_ethtool_seteeprom(struct net_device *dev, return ret; /* write byte */ if ((ret=smc911x_ethtool_write_eeprom_byte(dev, *data))!=0) - return ret; + return ret; if ((ret=smc911x_ethtool_write_eeprom_cmd(dev, E2P_CMD_EPC_CMD_WRITE_, i ))!=0) return ret; - } - return 0; + } + return 0; } static int smc911x_ethtool_geteeprom_len(struct net_device *dev) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 617d0e4c64958385c5a65ae21e42befbd219f9fb..09644ab0d87a7d3dc42a07009bb8bd6772f0edd1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -756,7 +756,7 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv) if (err) { dev_err(priv->device, "EMAC reset timeout\n"); - return -EFAULT; + return err; } return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c index e2e0f977875d7026b030564c3828af022271721f..c3f10a92b62b804294d401c9e4b67618261aa8e4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c @@ -22,21 +22,21 @@ #define ETHER_CLK_SEL_RMII_CLK_EN BIT(2) #define ETHER_CLK_SEL_RMII_CLK_RST BIT(3) #define ETHER_CLK_SEL_DIV_SEL_2 BIT(4) -#define ETHER_CLK_SEL_DIV_SEL_20 BIT(0) +#define ETHER_CLK_SEL_DIV_SEL_20 0 #define ETHER_CLK_SEL_FREQ_SEL_125M (BIT(9) | BIT(8)) #define ETHER_CLK_SEL_FREQ_SEL_50M BIT(9) #define ETHER_CLK_SEL_FREQ_SEL_25M BIT(8) #define ETHER_CLK_SEL_FREQ_SEL_2P5M 0 -#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN BIT(0) +#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN 0 #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC BIT(10) #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV BIT(11) -#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN BIT(0) +#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN 0 #define ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC BIT(12) #define ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV BIT(13) -#define ETHER_CLK_SEL_TX_CLK_O_TX_I BIT(0) +#define ETHER_CLK_SEL_TX_CLK_O_TX_I 0 #define ETHER_CLK_SEL_TX_CLK_O_RMII_I BIT(14) #define ETHER_CLK_SEL_TX_O_E_N_IN BIT(15) -#define ETHER_CLK_SEL_RMII_CLK_SEL_IN BIT(0) +#define ETHER_CLK_SEL_RMII_CLK_SEL_IN 0 #define ETHER_CLK_SEL_RMII_CLK_SEL_RX_C BIT(16) #define ETHER_CLK_SEL_RX_TX_CLK_EN (ETHER_CLK_SEL_RX_CLK_EN | ETHER_CLK_SEL_TX_CLK_EN) @@ -49,13 +49,15 @@ struct visconti_eth { void __iomem *reg; u32 phy_intf_sel; struct clk *phy_ref_clk; + struct device *dev; spinlock_t lock; /* lock to protect register update */ }; static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed) { struct visconti_eth *dwmac = priv; - unsigned int val, clk_sel_val; + struct net_device *netdev = dev_get_drvdata(dwmac->dev); + unsigned int val, clk_sel_val = 0; unsigned long flags; spin_lock_irqsave(&dwmac->lock, flags); @@ -85,7 +87,9 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed) break; default: /* No bit control */ - break; + netdev_err(netdev, "Unsupported speed request (%d)", speed); + spin_unlock_irqrestore(&dwmac->lock, flags); + return; } writel(val, dwmac->reg + MAC_CTRL_REG); @@ -96,31 +100,41 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed) val |= ETHER_CLK_SEL_TX_O_E_N_IN; writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + /* Set Clock-Mux, Start clock, Set TX_O direction */ switch (dwmac->phy_intf_sel) { case ETHER_CONFIG_INTF_RGMII: val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + + val |= ETHER_CLK_SEL_RX_TX_CLK_EN; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + + val &= ~ETHER_CLK_SEL_TX_O_E_N_IN; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); break; case ETHER_CONFIG_INTF_RMII: val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV | - ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN | + ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN | ETHER_CLK_SEL_RMII_CLK_SEL_RX_C; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + + val |= ETHER_CLK_SEL_RMII_CLK_RST; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + + val |= ETHER_CLK_SEL_RMII_CLK_EN | ETHER_CLK_SEL_RX_TX_CLK_EN; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); break; case ETHER_CONFIG_INTF_MII: default: val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC | - ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN | - ETHER_CLK_SEL_RMII_CLK_EN; + ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); + + val |= ETHER_CLK_SEL_RX_TX_CLK_EN; + writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); break; } - /* Start clock */ - writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); - val |= ETHER_CLK_SEL_RX_TX_CLK_EN; - writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); - - val &= ~ETHER_CLK_SEL_TX_O_E_N_IN; - writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL); - spin_unlock_irqrestore(&dwmac->lock, flags); } @@ -219,6 +233,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev) spin_lock_init(&dwmac->lock); dwmac->reg = stmmac_res.addr; + dwmac->dev = &pdev->dev; plat_dat->bsp_priv = dwmac; plat_dat->fix_mac_speed = visconti_eth_fix_mac_speed; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index 1914ad698cab24e4df398cbafd3e40d677a50d02..acd70b9a3173c4202a8f1e80dab51e21cfe3074c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -150,6 +150,7 @@ #define NUM_DWMAC100_DMA_REGS 9 #define NUM_DWMAC1000_DMA_REGS 23 +#define NUM_DWMAC4_DMA_REGS 27 void dwmac_enable_dma_transmission(void __iomem *ioaddr); void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 40b5ed94cb54ac2e2971fad161e925f21e15354d..5b195d5051d630593a6502347590bd1bdb282279 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -194,7 +194,6 @@ struct stmmac_priv { u32 tx_coal_timer[MTL_MAX_TX_QUEUES]; u32 rx_coal_frames[MTL_MAX_TX_QUEUES]; - int tx_coalesce; int hwts_tx_en; bool tx_path_in_lpi_mode; bool tso; @@ -229,7 +228,6 @@ struct stmmac_priv { unsigned int flow_ctrl; unsigned int pause; struct mii_bus *mii; - int mii_irq[PHY_MAX_ADDR]; struct phylink_config phylink_config; struct phylink *phylink; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 164dff5ec32e79e62b17d56cdd3ac8bcfcbb9b40..abfb3cd5958dfe353c184a98c07532c9ddb2753d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -21,10 +21,18 @@ #include "dwxgmac2.h" #define REG_SPACE_SIZE 0x1060 +#define GMAC4_REG_SPACE_SIZE 0x116C #define MAC100_ETHTOOL_NAME "st_mac100" #define GMAC_ETHTOOL_NAME "st_gmac" #define XGMAC_ETHTOOL_NAME "st_xgmac" +/* Same as DMA_CHAN_BASE_ADDR defined in dwmac4_dma.h + * + * It is here because dwmac_dma.h and dwmac4_dam.h can not be included at the + * same time due to the conflicting macro names. + */ +#define GMAC4_DMA_CHAN_BASE_ADDR 0x00001100 + #define ETHTOOL_DMA_OFFSET 55 struct stmmac_stats { @@ -434,6 +442,8 @@ static int stmmac_ethtool_get_regs_len(struct net_device *dev) if (priv->plat->has_xgmac) return XGMAC_REGSIZE * 4; + else if (priv->plat->has_gmac4) + return GMAC4_REG_SPACE_SIZE; return REG_SPACE_SIZE; } @@ -446,8 +456,13 @@ static void stmmac_ethtool_gregs(struct net_device *dev, stmmac_dump_mac_regs(priv, priv->hw, reg_space); stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space); - if (!priv->plat->has_xgmac) { - /* Copy DMA registers to where ethtool expects them */ + /* Copy DMA registers to where ethtool expects them */ + if (priv->plat->has_gmac4) { + /* GMAC4 dumps its DMA registers at its DMA_CHAN_BASE_ADDR */ + memcpy(®_space[ETHTOOL_DMA_OFFSET], + ®_space[GMAC4_DMA_CHAN_BASE_ADDR / 4], + NUM_DWMAC4_DMA_REGS * 4); + } else if (!priv->plat->has_xgmac) { memcpy(®_space[ETHTOOL_DMA_OFFSET], ®_space[DMA_BUS_MODE / 4], NUM_DWMAC1000_DMA_REGS * 4); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index 074e2cdfb0fa6d3b1c944998d66b3306a88e51b0..a7ec9f4d46ced331065a22bc93dddf082f8c3bed 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -145,15 +145,20 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, static void get_systime(void __iomem *ioaddr, u64 *systime) { - u64 ns; - - /* Get the TSSS value */ - ns = readl(ioaddr + PTP_STNSR); - /* Get the TSS and convert sec time value to nanosecond */ - ns += readl(ioaddr + PTP_STSR) * 1000000000ULL; + u64 ns, sec0, sec1; + + /* Get the TSS value */ + sec1 = readl_relaxed(ioaddr + PTP_STSR); + do { + sec0 = sec1; + /* Get the TSSS value */ + ns = readl_relaxed(ioaddr + PTP_STNSR); + /* Get the TSS value */ + sec1 = readl_relaxed(ioaddr + PTP_STSR); + } while (sec0 != sec1); if (systime) - *systime = ns; + *systime = ns + (sec1 * 1000000000ULL); } static void get_ptptime(void __iomem *ptpaddr, u64 *ptp_time) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6708ca2aa4f73dfdc0ef130ae41d004210b30ee7..bde76ea2deecf0ac927a1577a34a048efe2214a9 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -402,7 +402,7 @@ static void stmmac_lpi_entry_timer_config(struct stmmac_priv *priv, bool en) * Description: this function is to verify and enter in LPI mode in case of * EEE. */ -static void stmmac_enable_eee_mode(struct stmmac_priv *priv) +static int stmmac_enable_eee_mode(struct stmmac_priv *priv) { u32 tx_cnt = priv->plat->tx_queues_to_use; u32 queue; @@ -412,13 +412,14 @@ static void stmmac_enable_eee_mode(struct stmmac_priv *priv) struct stmmac_tx_queue *tx_q = &priv->tx_queue[queue]; if (tx_q->dirty_tx != tx_q->cur_tx) - return; /* still unfinished work */ + return -EBUSY; /* still unfinished work */ } /* Check and enter in LPI mode */ if (!priv->tx_path_in_lpi_mode) stmmac_set_eee_mode(priv, priv->hw, priv->plat->en_tx_lpi_clockgating); + return 0; } /** @@ -450,8 +451,8 @@ static void stmmac_eee_ctrl_timer(struct timer_list *t) { struct stmmac_priv *priv = from_timer(priv, t, eee_ctrl_timer); - stmmac_enable_eee_mode(priv); - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); + if (stmmac_enable_eee_mode(priv)) + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); } /** @@ -889,6 +890,9 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; int ret; + if (priv->plat->ptp_clk_freq_config) + priv->plat->ptp_clk_freq_config(priv); + ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE); if (ret) return ret; @@ -911,8 +915,6 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) priv->hwts_tx_en = 0; priv->hwts_rx_en = 0; - stmmac_ptp_register(priv); - return 0; } @@ -2647,8 +2649,8 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue) if (priv->eee_enabled && !priv->tx_path_in_lpi_mode && priv->eee_sw_timer_en) { - stmmac_enable_eee_mode(priv); - mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); + if (stmmac_enable_eee_mode(priv)) + mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(priv->tx_lpi_timer)); } /* We still have pending packets, let's call for a new scheduling */ @@ -3238,7 +3240,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv) /** * stmmac_hw_setup - setup mac in a usable state. * @dev : pointer to the device structure. - * @init_ptp: initialize PTP if set + * @ptp_register: register PTP if set * Description: * this is the main function to setup the HW in a usable state because the * dma engine is reset, the core registers are configured (e.g. AXI, @@ -3248,7 +3250,7 @@ static int stmmac_fpe_start_wq(struct stmmac_priv *priv) * 0 on success and an appropriate (-)ve integer as defined in errno.h * file on failure. */ -static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) +static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) { struct stmmac_priv *priv = netdev_priv(dev); u32 rx_cnt = priv->plat->rx_queues_to_use; @@ -3305,13 +3307,13 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) stmmac_mmc_setup(priv); - if (init_ptp) { - ret = stmmac_init_ptp(priv); - if (ret == -EOPNOTSUPP) - netdev_warn(priv->dev, "PTP not supported by HW\n"); - else if (ret) - netdev_warn(priv->dev, "PTP init failed\n"); - } + ret = stmmac_init_ptp(priv); + if (ret == -EOPNOTSUPP) + netdev_warn(priv->dev, "PTP not supported by HW\n"); + else if (ret) + netdev_warn(priv->dev, "PTP init failed\n"); + else if (ptp_register) + stmmac_ptp_register(priv); priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS; @@ -7250,6 +7252,10 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); + pm_runtime_get_sync(dev); + pm_runtime_disable(dev); + pm_runtime_put_noidle(dev); + stmmac_stop_all_dma(priv); stmmac_mac_set(priv, priv->ioaddr, false); netif_carrier_off(ndev); @@ -7268,8 +7274,6 @@ int stmmac_dvr_remove(struct device *dev) if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); reset_control_assert(priv->plat->stmmac_ahb_rst); - pm_runtime_put(dev); - pm_runtime_disable(dev); if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) stmmac_mdio_unregister(ndev); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 0d24ebd37873128a3199a88a43af0c629b901578..1c9f02f9c3175863f31c1426782236718e29dfa4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -297,9 +297,6 @@ void stmmac_ptp_register(struct stmmac_priv *priv) { int i; - if (priv->plat->ptp_clk_freq_config) - priv->plat->ptp_clk_freq_config(priv); - for (i = 0; i < priv->dma_cap.pps_out_num; i++) { if (i >= STMMAC_PPS_MAX) break; diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index ba220593e6db72e80ba34a8b45bdc8d786f87806..8f6817f346baff5a7c83993c5ec515cc00c748d2 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -1146,7 +1146,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv) static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw, int size) { - struct page_pool_params pp_params; + struct page_pool_params pp_params = {}; struct page_pool *pool; pp_params.order = 0; diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index cf0917b29e3003368567b9b32c4c4c372159d349..5251fc32422192916954a398d3c3456a9f497f5e 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -1091,20 +1091,22 @@ static int tsi108_get_mac(struct net_device *dev) struct tsi108_prv_data *data = netdev_priv(dev); u32 word1 = TSI_READ(TSI108_MAC_ADDR1); u32 word2 = TSI_READ(TSI108_MAC_ADDR2); + u8 addr[ETH_ALEN]; /* Note that the octets are reversed from what the manual says, * producing an even weirder ordering... */ if (word2 == 0 && word1 == 0) { - dev->dev_addr[0] = 0x00; - dev->dev_addr[1] = 0x06; - dev->dev_addr[2] = 0xd2; - dev->dev_addr[3] = 0x00; - dev->dev_addr[4] = 0x00; + addr[0] = 0x00; + addr[1] = 0x06; + addr[2] = 0xd2; + addr[3] = 0x00; + addr[4] = 0x00; if (0x8 == data->phy) - dev->dev_addr[5] = 0x01; + addr[5] = 0x01; else - dev->dev_addr[5] = 0x02; + addr[5] = 0x02; + eth_hw_addr_set(dev, addr); word2 = (dev->dev_addr[0] << 16) | (dev->dev_addr[1] << 24); @@ -1114,12 +1116,13 @@ static int tsi108_get_mac(struct net_device *dev) TSI_WRITE(TSI108_MAC_ADDR1, word1); TSI_WRITE(TSI108_MAC_ADDR2, word2); } else { - dev->dev_addr[0] = (word2 >> 16) & 0xff; - dev->dev_addr[1] = (word2 >> 24) & 0xff; - dev->dev_addr[2] = (word1 >> 0) & 0xff; - dev->dev_addr[3] = (word1 >> 8) & 0xff; - dev->dev_addr[4] = (word1 >> 16) & 0xff; - dev->dev_addr[5] = (word1 >> 24) & 0xff; + addr[0] = (word2 >> 16) & 0xff; + addr[1] = (word2 >> 24) & 0xff; + addr[2] = (word1 >> 0) & 0xff; + addr[3] = (word1 >> 8) & 0xff; + addr[4] = (word1 >> 16) & 0xff; + addr[5] = (word1 >> 24) & 0xff; + eth_hw_addr_set(dev, addr); } if (!is_valid_ether_addr(dev->dev_addr)) { @@ -1136,14 +1139,12 @@ static int tsi108_set_mac(struct net_device *dev, void *addr) { struct tsi108_prv_data *data = netdev_priv(dev); u32 word1, word2; - int i; if (!is_valid_ether_addr(addr)) return -EADDRNOTAVAIL; - for (i = 0; i < 6; i++) - /* +2 is for the offset of the HW addr type */ - dev->dev_addr[i] = ((unsigned char *)addr)[i + 2]; + /* +2 is for the offset of the HW addr type */ + eth_hw_addr_set(dev, ((unsigned char *)addr) + 2); word2 = (dev->dev_addr[0] << 16) | (dev->dev_addr[1] << 24); diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 6376b8485976cf454ebf9e8a4939c708f3116bd1..980f2be32f05a1974c13aba7d44f257901ed8cd6 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -950,9 +950,7 @@ static int yam_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __ ym = memdup_user(data, sizeof(struct yamdrv_ioctl_mcs)); if (IS_ERR(ym)) return PTR_ERR(ym); - if (ym->cmd != SIOCYAMSMCS) - return -EINVAL; - if (ym->bitrate > YAM_MAXBITRATE) { + if (ym->cmd != SIOCYAMSMCS || ym->bitrate > YAM_MAXBITRATE) { kfree(ym); return -EINVAL; } diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 7d67f41387f5577959839f902128b1777b4b51ee..4f5ef8a9a9a8796db21f270bbb707eb5204e10f1 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -100,6 +100,7 @@ struct at86rf230_local { unsigned long cal_timeout; bool is_tx; bool is_tx_from_off; + bool was_tx; u8 tx_retry; struct sk_buff *tx_skb; struct at86rf230_state_change tx; @@ -343,7 +344,11 @@ at86rf230_async_error_recover_complete(void *context) if (ctx->free) kfree(ctx); - ieee802154_wake_queue(lp->hw); + if (lp->was_tx) { + lp->was_tx = 0; + dev_kfree_skb_any(lp->tx_skb); + ieee802154_wake_queue(lp->hw); + } } static void @@ -352,7 +357,11 @@ at86rf230_async_error_recover(void *context) struct at86rf230_state_change *ctx = context; struct at86rf230_local *lp = ctx->lp; - lp->is_tx = 0; + if (lp->is_tx) { + lp->was_tx = 1; + lp->is_tx = 0; + } + at86rf230_async_state_change(lp, ctx, STATE_RX_AACK_ON, at86rf230_async_error_recover_complete); } diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index ece6ff6049f66b9d9ffaec53b696adb0235e836c..f3438d3e104acb77cd4183cab3b8fc87901b76d9 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -1771,6 +1771,7 @@ static int ca8210_async_xmit_complete( status ); if (status != MAC_TRANSACTION_OVERFLOW) { + dev_kfree_skb_any(priv->tx_skb); ieee802154_wake_queue(priv->hw); return 0; } diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 8caa61ec718f503d0731262446a88805ff5e06f3..36f1c5aa98fc6dc3815d15f4af1c776fbd5ec045 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -786,6 +786,7 @@ static int hwsim_add_one(struct genl_info *info, struct device *dev, goto err_pib; } + pib->channel = 13; rcu_assign_pointer(phy->pib, pib); phy->idx = idx; INIT_LIST_HEAD(&phy->edges); diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 8dc04e2590b18b1b41239c4ebf8ac1382942a8d5..383231b85464252a9dfc6a20f938247e5b546959 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -976,8 +976,8 @@ static void mcr20a_hw_setup(struct mcr20a_local *lp) dev_dbg(printdev(lp), "%s\n", __func__); phy->symbol_duration = 16; - phy->lifs_period = 40; - phy->sifs_period = 12; + phy->lifs_period = 40 * phy->symbol_duration; + phy->sifs_period = 12 * phy->symbol_duration; hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | diff --git a/drivers/net/ipa/ipa_power.c b/drivers/net/ipa/ipa_power.c index b1c6c0fcb654f69e363af2e497a0d75fe87a02b2..f2989aac47a62f5944560fddb0465e3b3d7e7671 100644 --- a/drivers/net/ipa/ipa_power.c +++ b/drivers/net/ipa/ipa_power.c @@ -11,6 +11,8 @@ #include #include +#include "linux/soc/qcom/qcom_aoss.h" + #include "ipa.h" #include "ipa_power.h" #include "ipa_endpoint.h" @@ -64,6 +66,7 @@ enum ipa_power_flag { * struct ipa_power - IPA power management information * @dev: IPA device pointer * @core: IPA core clock + * @qmp: QMP handle for AOSS communication * @spinlock: Protects modem TX queue enable/disable * @flags: Boolean state flags * @interconnect_count: Number of elements in interconnect[] @@ -72,6 +75,7 @@ enum ipa_power_flag { struct ipa_power { struct device *dev; struct clk *core; + struct qmp *qmp; spinlock_t spinlock; /* used with STOPPED/STARTED power flags */ DECLARE_BITMAP(flags, IPA_POWER_FLAG_COUNT); u32 interconnect_count; @@ -382,6 +386,47 @@ void ipa_power_modem_queue_active(struct ipa *ipa) clear_bit(IPA_POWER_FLAG_STARTED, ipa->power->flags); } +static int ipa_power_retention_init(struct ipa_power *power) +{ + struct qmp *qmp = qmp_get(power->dev); + + if (IS_ERR(qmp)) { + if (PTR_ERR(qmp) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + /* We assume any other error means it's not defined/needed */ + qmp = NULL; + } + power->qmp = qmp; + + return 0; +} + +static void ipa_power_retention_exit(struct ipa_power *power) +{ + qmp_put(power->qmp); + power->qmp = NULL; +} + +/* Control register retention on power collapse */ +void ipa_power_retention(struct ipa *ipa, bool enable) +{ + static const char fmt[] = "{ class: bcm, res: ipa_pc, val: %c }"; + struct ipa_power *power = ipa->power; + char buf[36]; /* Exactly enough for fmt[]; size a multiple of 4 */ + int ret; + + if (!power->qmp) + return; /* Not needed on this platform */ + + (void)snprintf(buf, sizeof(buf), fmt, enable ? '1' : '0'); + + ret = qmp_send(power->qmp, buf, sizeof(buf)); + if (ret) + dev_err(power->dev, "error %d sending QMP %sable request\n", + ret, enable ? "en" : "dis"); +} + int ipa_power_setup(struct ipa *ipa) { int ret; @@ -438,12 +483,18 @@ ipa_power_init(struct device *dev, const struct ipa_power_data *data) if (ret) goto err_kfree; + ret = ipa_power_retention_init(power); + if (ret) + goto err_interconnect_exit; + pm_runtime_set_autosuspend_delay(dev, IPA_AUTOSUSPEND_DELAY); pm_runtime_use_autosuspend(dev); pm_runtime_enable(dev); return power; +err_interconnect_exit: + ipa_interconnect_exit(power); err_kfree: kfree(power); err_clk_put: @@ -460,6 +511,7 @@ void ipa_power_exit(struct ipa_power *power) pm_runtime_disable(dev); pm_runtime_dont_use_autosuspend(dev); + ipa_power_retention_exit(power); ipa_interconnect_exit(power); kfree(power); clk_put(clk); diff --git a/drivers/net/ipa/ipa_power.h b/drivers/net/ipa/ipa_power.h index 2151805d7fbb0fa40dec0a0984827fb152a6d6f1..6f84f057a2095b92d869ba0c8d626ab85b9c9c34 100644 --- a/drivers/net/ipa/ipa_power.h +++ b/drivers/net/ipa/ipa_power.h @@ -40,6 +40,13 @@ void ipa_power_modem_queue_wake(struct ipa *ipa); */ void ipa_power_modem_queue_active(struct ipa *ipa); +/** + * ipa_power_retention() - Control register retention on power collapse + * @ipa: IPA pointer + * @enable: Whether retention should be enabled or disabled + */ +void ipa_power_retention(struct ipa *ipa, bool enable); + /** * ipa_power_setup() - Set up IPA power management * @ipa: IPA pointer diff --git a/drivers/net/ipa/ipa_uc.c b/drivers/net/ipa/ipa_uc.c index 856e55a080a7f07ee2024b3d1932ba73893a8400..fe11910518d95f1ee352eb1ce1cf8cdb60d91c45 100644 --- a/drivers/net/ipa/ipa_uc.c +++ b/drivers/net/ipa/ipa_uc.c @@ -11,6 +11,7 @@ #include "ipa.h" #include "ipa_uc.h" +#include "ipa_power.h" /** * DOC: The IPA embedded microcontroller @@ -154,6 +155,7 @@ static void ipa_uc_response_hdlr(struct ipa *ipa, enum ipa_irq_id irq_id) case IPA_UC_RESPONSE_INIT_COMPLETED: if (ipa->uc_powered) { ipa->uc_loaded = true; + ipa_power_retention(ipa, true); pm_runtime_mark_last_busy(dev); (void)pm_runtime_put_autosuspend(dev); ipa->uc_powered = false; @@ -184,6 +186,9 @@ void ipa_uc_deconfig(struct ipa *ipa) ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_1); ipa_interrupt_remove(ipa->interrupt, IPA_IRQ_UC_0); + if (ipa->uc_loaded) + ipa_power_retention(ipa, false); + if (!ipa->uc_powered) return; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 16aa3a478e9e8d15ed14e3d62ca3a1e539e99993..3d08743317634044d887e48336050cbf5022ac68 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3870,6 +3870,18 @@ static void macsec_common_dellink(struct net_device *dev, struct list_head *head struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; + /* If h/w offloading is available, propagate to the device */ + if (macsec_is_offloaded(macsec)) { + const struct macsec_ops *ops; + struct macsec_context ctx; + + ops = macsec_get_ops(netdev_priv(dev), &ctx); + if (ops) { + ctx.secy = &macsec->secy; + macsec_offload(ops->mdo_del_secy, &ctx); + } + } + unregister_netdevice_queue(dev, head); list_del_rcu(&macsec->secys); macsec_del_dev(macsec); @@ -3884,18 +3896,6 @@ static void macsec_dellink(struct net_device *dev, struct list_head *head) struct net_device *real_dev = macsec->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); - /* If h/w offloading is available, propagate to the device */ - if (macsec_is_offloaded(macsec)) { - const struct macsec_ops *ops; - struct macsec_context ctx; - - ops = macsec_get_ops(netdev_priv(dev), &ctx); - if (ops) { - ctx.secy = &macsec->secy; - macsec_offload(ops->mdo_del_secy, &ctx); - } - } - macsec_common_dellink(dev, head); if (list_empty(&rxd->secys)) { @@ -4018,6 +4018,15 @@ static int macsec_newlink(struct net *net, struct net_device *dev, !macsec_check_offload(macsec->offload, macsec)) return -EOPNOTSUPP; + /* send_sci must be set to true when transmit sci explicitly is set */ + if ((data && data[IFLA_MACSEC_SCI]) && + (data && data[IFLA_MACSEC_INC_SCI])) { + u8 send_sci = !!nla_get_u8(data[IFLA_MACSEC_INC_SCI]); + + if (!send_sci) + return -EINVAL; + } + if (data && data[IFLA_MACSEC_ICV_LEN]) icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); mtu = real_dev->mtu - icv_len - macsec_extra_len(true); diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c index 966c3b4ad59d14e74bc113680e0fbff231d296d2..e2273588c75b6f3dd212269611d7f41a8769ed16 100644 --- a/drivers/net/mdio/mdio-aspeed.c +++ b/drivers/net/mdio/mdio-aspeed.c @@ -148,6 +148,7 @@ static const struct of_device_id aspeed_mdio_of_match[] = { { .compatible = "aspeed,ast2600-mdio", }, { }, }; +MODULE_DEVICE_TABLE(of, aspeed_mdio_of_match); static struct platform_driver aspeed_mdio_driver = { .driver = { diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index 5b6c0d120e09e2b00347c230dad696d884f86b83..29aa811af430f0d03ad94f8c8b62a81292929948 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -1688,19 +1688,19 @@ static int qca808x_read_status(struct phy_device *phydev) if (ret < 0) return ret; - if (phydev->link && phydev->speed == SPEED_2500) - phydev->interface = PHY_INTERFACE_MODE_2500BASEX; - else - phydev->interface = PHY_INTERFACE_MODE_SMII; - - /* generate seed as a lower random value to make PHY linked as SLAVE easily, - * except for master/slave configuration fault detected. - * the reason for not putting this code into the function link_change_notify is - * the corner case where the link partner is also the qca8081 PHY and the seed - * value is configured as the same value, the link can't be up and no link change - * occurs. - */ - if (!phydev->link) { + if (phydev->link) { + if (phydev->speed == SPEED_2500) + phydev->interface = PHY_INTERFACE_MODE_2500BASEX; + else + phydev->interface = PHY_INTERFACE_MODE_SGMII; + } else { + /* generate seed as a lower random value to make PHY linked as SLAVE easily, + * except for master/slave configuration fault detected. + * the reason for not putting this code into the function link_change_notify is + * the corner case where the link partner is also the qca8081 PHY and the seed + * value is configured as the same value, the link can't be up and no link change + * occurs. + */ if (phydev->master_slave_state == MASTER_SLAVE_STATE_ERR) { qca808x_phy_ms_seed_enable(phydev, false); } else { diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index bb5104ae46104c7a13a7749578839ec9abe2cd5a..3c683e0e40e9e6f01b0c429efdc2546a37d91edf 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -854,6 +854,7 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54616S", /* PHY_GBIT_FEATURES */ + .soft_reset = genphy_soft_reset, .config_init = bcm54xx_config_init, .config_aneg = bcm54616s_config_aneg, .config_intr = bcm_phy_config_intr, diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index fa71fb7a66b5706558db85f76bef2615e6735da6..2429db614b59ab4dfc91b4053edfebbff6effb8a 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -553,9 +553,9 @@ static int m88e1121_config_aneg_rgmii_delays(struct phy_device *phydev) else mscr = 0; - return phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE, - MII_88E1121_PHY_MSCR_REG, - MII_88E1121_PHY_MSCR_DELAY_MASK, mscr); + return phy_modify_paged_changed(phydev, MII_MARVELL_MSCR_PAGE, + MII_88E1121_PHY_MSCR_REG, + MII_88E1121_PHY_MSCR_DELAY_MASK, mscr); } static int m88e1121_config_aneg(struct phy_device *phydev) @@ -569,11 +569,13 @@ static int m88e1121_config_aneg(struct phy_device *phydev) return err; } + changed = err; + err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; - changed = err; + changed |= err; err = genphy_config_aneg(phydev); if (err < 0) @@ -1213,16 +1215,15 @@ static int m88e1118_config_aneg(struct phy_device *phydev) { int err; - err = genphy_soft_reset(phydev); + err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; - err = marvell_set_polarity(phydev, phydev->mdix_ctrl); + err = genphy_config_aneg(phydev); if (err < 0) return err; - err = genphy_config_aneg(phydev); - return 0; + return genphy_soft_reset(phydev); } static int m88e1118_config_init(struct phy_device *phydev) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 74d8e1dc125f80c223cdb4c0688eed025dc8b700..ce0bb5951b81e356aba0322ac2fb752e48eaf3bd 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1746,6 +1746,9 @@ void phy_detach(struct phy_device *phydev) phy_driver_is_genphy_10g(phydev)) device_release_driver(&phydev->mdio.dev); + /* Assert the reset signal */ + phy_device_reset(phydev, 1); + /* * The phydev might go away on the put_device() below, so avoid * a use-after-free bug by reading the underlying bus first. @@ -1757,9 +1760,6 @@ void phy_detach(struct phy_device *phydev) ndev_owner = dev->dev.parent->driver->owner; if (ndev_owner != bus->owner) module_put(bus->owner); - - /* Assert the reset signal */ - phy_device_reset(phydev, 1); } EXPORT_SYMBOL(phy_detach); diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 0c6c0d1843bce34023cb8211401f159a8c148619..c1512c9925a661c0853afd78d8da308c6318f514 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -651,6 +651,11 @@ struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) else if (ret < 0) return ERR_PTR(ret); + if (!fwnode_device_is_available(ref.fwnode)) { + fwnode_handle_put(ref.fwnode); + return NULL; + } + bus = sfp_bus_get(ref.fwnode); fwnode_handle_put(ref.fwnode); if (!bus) diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 1a627ba4b850212c32bd7b7c1703dcd4a0472059..a31098981a652eb8f06c05f604e233e46b1f52c5 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1468,58 +1468,68 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) u16 hdr_off; u32 *pkt_hdr; - /* This check is no longer done by usbnet */ - if (skb->len < dev->net->hard_header_len) + /* At the end of the SKB, there's a header telling us how many packets + * are bundled into this buffer and where we can find an array of + * per-packet metadata (which contains elements encoded into u16). + */ + if (skb->len < 4) return 0; - skb_trim(skb, skb->len - 4); rx_hdr = get_unaligned_le32(skb_tail_pointer(skb)); - pkt_cnt = (u16)rx_hdr; hdr_off = (u16)(rx_hdr >> 16); + + if (pkt_cnt == 0) + return 0; + + /* Make sure that the bounds of the metadata array are inside the SKB + * (and in front of the counter at the end). + */ + if (pkt_cnt * 2 + hdr_off > skb->len) + return 0; pkt_hdr = (u32 *)(skb->data + hdr_off); - while (pkt_cnt--) { + /* Packets must not overlap the metadata array */ + skb_trim(skb, hdr_off); + + for (; ; pkt_cnt--, pkt_hdr++) { u16 pkt_len; le32_to_cpus(pkt_hdr); pkt_len = (*pkt_hdr >> 16) & 0x1fff; - /* Check CRC or runt packet */ - if ((*pkt_hdr & AX_RXHDR_CRC_ERR) || - (*pkt_hdr & AX_RXHDR_DROP_ERR)) { - skb_pull(skb, (pkt_len + 7) & 0xFFF8); - pkt_hdr++; - continue; - } - - if (pkt_cnt == 0) { - skb->len = pkt_len; - /* Skip IP alignment pseudo header */ - skb_pull(skb, 2); - skb_set_tail_pointer(skb, skb->len); - skb->truesize = pkt_len + sizeof(struct sk_buff); - ax88179_rx_checksum(skb, pkt_hdr); - return 1; - } + if (pkt_len > skb->len) + return 0; - ax_skb = skb_clone(skb, GFP_ATOMIC); - if (ax_skb) { + /* Check CRC or runt packet */ + if (((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) == 0) && + pkt_len >= 2 + ETH_HLEN) { + bool last = (pkt_cnt == 0); + + if (last) { + ax_skb = skb; + } else { + ax_skb = skb_clone(skb, GFP_ATOMIC); + if (!ax_skb) + return 0; + } ax_skb->len = pkt_len; /* Skip IP alignment pseudo header */ skb_pull(ax_skb, 2); skb_set_tail_pointer(ax_skb, ax_skb->len); ax_skb->truesize = pkt_len + sizeof(struct sk_buff); ax88179_rx_checksum(ax_skb, pkt_hdr); + + if (last) + return 1; + usbnet_skb_return(dev, ax_skb); - } else { - return 0; } - skb_pull(skb, (pkt_len + 7) & 0xFFF8); - pkt_hdr++; + /* Trim this packet away from the SKB */ + if (!skb_pull(skb, (pkt_len + 7) & 0xFFF8)) + return 0; } - return 1; } static struct sk_buff * diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index cd33955df0b65f52ac16b9f7b689956fd6c1c92d..6a769df0b4213cc4cedb0aecc58910ac82b63b22 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct ipheth_device *iphone) if (tx_buf == NULL) goto free_rx_urb; - rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE, + rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, GFP_KERNEL, &rx_urb->transfer_dma); if (rx_buf == NULL) goto free_tx_buf; @@ -146,7 +146,7 @@ error_nomem: static void ipheth_free_urbs(struct ipheth_device *iphone) { - usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf, + usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf, iphone->rx_urb->transfer_dma); usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf, iphone->tx_urb->transfer_dma); @@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags) usb_fill_bulk_urb(dev->rx_urb, udev, usb_rcvbulkpipe(udev, dev->bulk_in), - dev->rx_buf, IPHETH_BUF_SIZE, + dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, ipheth_rcvbulk_callback, dev); dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 37e5f3495362946da2f9910ff5e12a40b5dab957..3353e761016d715949a013189a7a64a1aa54c748 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1400,6 +1400,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */ {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ + {QMI_FIXED_INTF(0x413c, 0x81e4, 0)}, /* Dell Wireless 5829e with eSIM support*/ + {QMI_FIXED_INTF(0x413c, 0x81e6, 0)}, /* Dell Wireless 5829e */ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_QUIRK_SET_DTR(0x22de, 0x9051, 2)}, /* Hucom Wireless HM-211S/K */ diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 354a963075c5f15d194152ca9b38c59c66763e94..d29fb9759cc9557d62370f016fc160f3dbd16ce3 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -265,9 +265,10 @@ static void __veth_xdp_flush(struct veth_rq *rq) { /* Write ptr_ring before reading rx_notify_masked */ smp_mb(); - if (!rq->rx_notify_masked) { - rq->rx_notify_masked = true; - napi_schedule(&rq->xdp_napi); + if (!READ_ONCE(rq->rx_notify_masked) && + napi_schedule_prep(&rq->xdp_napi)) { + WRITE_ONCE(rq->rx_notify_masked, true); + __napi_schedule(&rq->xdp_napi); } } @@ -912,8 +913,10 @@ static int veth_poll(struct napi_struct *napi, int budget) /* Write rx_notify_masked before reading ptr_ring */ smp_store_mb(rq->rx_notify_masked, false); if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { - rq->rx_notify_masked = true; - napi_schedule(&rq->xdp_napi); + if (napi_schedule_prep(&rq->xdp_napi)) { + WRITE_ONCE(rq->rx_notify_masked, true); + __napi_schedule(&rq->xdp_napi); + } } } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3d97f158ec5978c916aa385ba26c60e16bdc5193..a801ea40908ffc9fa78df5f3bd6cb9e6821332f0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2101,7 +2101,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi) stragglers = num_cpu >= vi->curr_queue_pairs ? num_cpu % vi->curr_queue_pairs : 0; - cpu = cpumask_next(-1, cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); for (i = 0; i < vi->curr_queue_pairs; i++) { group_size = stride + (i < stragglers ? 1 : 0); diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c index 45594f003ef7b47908644431f7888fab370db4e4..452d08545d31a219c8e453b7beaaf92fc943eacf 100644 --- a/drivers/net/wireless/cisco/airo.c +++ b/drivers/net/wireless/cisco/airo.c @@ -4672,7 +4672,7 @@ static ssize_t proc_write(struct file *file, static int proc_status_open(struct inode *inode, struct file *file) { struct proc_data *data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *apriv = dev->ml_priv; CapabilityRid cap_rid; StatusRid status_rid; @@ -4756,7 +4756,7 @@ static int proc_stats_rid_open(struct inode *inode, u16 rid) { struct proc_data *data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *apriv = dev->ml_priv; StatsRid stats; int i, j; @@ -4819,7 +4819,7 @@ static inline int sniffing_mode(struct airo_info *ai) static void proc_config_on_close(struct inode *inode, struct file *file) { struct proc_data *data = file->private_data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *ai = dev->ml_priv; char *line; @@ -5030,7 +5030,7 @@ static const char *get_rmode(__le16 mode) static int proc_config_open(struct inode *inode, struct file *file) { struct proc_data *data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *ai = dev->ml_priv; int i; __le16 mode; @@ -5120,7 +5120,7 @@ static int proc_config_open(struct inode *inode, struct file *file) static void proc_SSID_on_close(struct inode *inode, struct file *file) { struct proc_data *data = file->private_data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *ai = dev->ml_priv; SsidRid SSID_rid; int i; @@ -5156,7 +5156,7 @@ static void proc_SSID_on_close(struct inode *inode, struct file *file) static void proc_APList_on_close(struct inode *inode, struct file *file) { struct proc_data *data = file->private_data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *ai = dev->ml_priv; APListRid *APList_rid = &ai->APList; int i; @@ -5280,7 +5280,7 @@ static int set_wep_tx_idx(struct airo_info *ai, u16 index, int perm, int lock) static void proc_wepkey_on_close(struct inode *inode, struct file *file) { struct proc_data *data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *ai = dev->ml_priv; int i, rc; char key[16]; @@ -5331,7 +5331,7 @@ static void proc_wepkey_on_close(struct inode *inode, struct file *file) static int proc_wepkey_open(struct inode *inode, struct file *file) { struct proc_data *data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *ai = dev->ml_priv; char *ptr; WepKeyRid wkr; @@ -5379,7 +5379,7 @@ static int proc_wepkey_open(struct inode *inode, struct file *file) static int proc_SSID_open(struct inode *inode, struct file *file) { struct proc_data *data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *ai = dev->ml_priv; int i; char *ptr; @@ -5423,7 +5423,7 @@ static int proc_SSID_open(struct inode *inode, struct file *file) static int proc_APList_open(struct inode *inode, struct file *file) { struct proc_data *data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *ai = dev->ml_priv; int i; char *ptr; @@ -5462,7 +5462,7 @@ static int proc_APList_open(struct inode *inode, struct file *file) static int proc_BSSList_open(struct inode *inode, struct file *file) { struct proc_data *data; - struct net_device *dev = PDE_DATA(inode); + struct net_device *dev = pde_data(inode); struct airo_info *ai = dev->ml_priv; char *ptr; BSSListRid BSSList_rid; diff --git a/drivers/net/wireless/intersil/hostap/hostap_ap.c b/drivers/net/wireless/intersil/hostap/hostap_ap.c index 8bcc1cdcb75b02492ceaaf3313a6c1e432e17be9..462ccc7d7d1ac9c97972a08a8be9f672200326dc 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ap.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ap.c @@ -69,7 +69,7 @@ static void prism2_send_mgmt(struct net_device *dev, #if !defined(PRISM2_NO_PROCFS_DEBUG) && defined(CONFIG_PROC_FS) static int ap_debug_proc_show(struct seq_file *m, void *v) { - struct ap_data *ap = PDE_DATA(file_inode(m->file)); + struct ap_data *ap = pde_data(file_inode(m->file)); seq_printf(m, "BridgedUnicastFrames=%u\n", ap->bridged_unicast); seq_printf(m, "BridgedMulticastFrames=%u\n", ap->bridged_multicast); @@ -320,7 +320,7 @@ void hostap_deauth_all_stas(struct net_device *dev, struct ap_data *ap, static int ap_control_proc_show(struct seq_file *m, void *v) { - struct ap_data *ap = PDE_DATA(file_inode(m->file)); + struct ap_data *ap = pde_data(file_inode(m->file)); char *policy_txt; struct mac_entry *entry; @@ -352,20 +352,20 @@ static int ap_control_proc_show(struct seq_file *m, void *v) static void *ap_control_proc_start(struct seq_file *m, loff_t *_pos) { - struct ap_data *ap = PDE_DATA(file_inode(m->file)); + struct ap_data *ap = pde_data(file_inode(m->file)); spin_lock_bh(&ap->mac_restrictions.lock); return seq_list_start_head(&ap->mac_restrictions.mac_list, *_pos); } static void *ap_control_proc_next(struct seq_file *m, void *v, loff_t *_pos) { - struct ap_data *ap = PDE_DATA(file_inode(m->file)); + struct ap_data *ap = pde_data(file_inode(m->file)); return seq_list_next(v, &ap->mac_restrictions.mac_list, _pos); } static void ap_control_proc_stop(struct seq_file *m, void *v) { - struct ap_data *ap = PDE_DATA(file_inode(m->file)); + struct ap_data *ap = pde_data(file_inode(m->file)); spin_unlock_bh(&ap->mac_restrictions.lock); } @@ -554,20 +554,20 @@ static int prism2_ap_proc_show(struct seq_file *m, void *v) static void *prism2_ap_proc_start(struct seq_file *m, loff_t *_pos) { - struct ap_data *ap = PDE_DATA(file_inode(m->file)); + struct ap_data *ap = pde_data(file_inode(m->file)); spin_lock_bh(&ap->sta_table_lock); return seq_list_start_head(&ap->sta_list, *_pos); } static void *prism2_ap_proc_next(struct seq_file *m, void *v, loff_t *_pos) { - struct ap_data *ap = PDE_DATA(file_inode(m->file)); + struct ap_data *ap = pde_data(file_inode(m->file)); return seq_list_next(v, &ap->sta_list, _pos); } static void prism2_ap_proc_stop(struct seq_file *m, void *v) { - struct ap_data *ap = PDE_DATA(file_inode(m->file)); + struct ap_data *ap = pde_data(file_inode(m->file)); spin_unlock_bh(&ap->sta_table_lock); } diff --git a/drivers/net/wireless/intersil/hostap/hostap_download.c b/drivers/net/wireless/intersil/hostap/hostap_download.c index 7c6a5a6d1d45d8057bd008ed6dda043921ab490e..3672291ced5c843c808d910beb2159afeff51ceb 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_download.c +++ b/drivers/net/wireless/intersil/hostap/hostap_download.c @@ -227,7 +227,7 @@ static int prism2_download_aux_dump_proc_open(struct inode *inode, struct file * sizeof(struct prism2_download_aux_dump)); if (ret == 0) { struct seq_file *m = file->private_data; - m->private = PDE_DATA(inode); + m->private = pde_data(inode); } return ret; } diff --git a/drivers/net/wireless/intersil/hostap/hostap_proc.c b/drivers/net/wireless/intersil/hostap/hostap_proc.c index 51c847d98755d0c17dde822daea3128dc8adeb94..61f68786056f40c4188f9a0c1e49ef7630e28bca 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_proc.c +++ b/drivers/net/wireless/intersil/hostap/hostap_proc.c @@ -97,20 +97,20 @@ static int prism2_wds_proc_show(struct seq_file *m, void *v) static void *prism2_wds_proc_start(struct seq_file *m, loff_t *_pos) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); read_lock_bh(&local->iface_lock); return seq_list_start(&local->hostap_interfaces, *_pos); } static void *prism2_wds_proc_next(struct seq_file *m, void *v, loff_t *_pos) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); return seq_list_next(v, &local->hostap_interfaces, _pos); } static void prism2_wds_proc_stop(struct seq_file *m, void *v) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); read_unlock_bh(&local->iface_lock); } @@ -123,7 +123,7 @@ static const struct seq_operations prism2_wds_proc_seqops = { static int prism2_bss_list_proc_show(struct seq_file *m, void *v) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); struct list_head *ptr = v; struct hostap_bss_info *bss; @@ -151,21 +151,21 @@ static int prism2_bss_list_proc_show(struct seq_file *m, void *v) static void *prism2_bss_list_proc_start(struct seq_file *m, loff_t *_pos) __acquires(&local->lock) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); spin_lock_bh(&local->lock); return seq_list_start_head(&local->bss_list, *_pos); } static void *prism2_bss_list_proc_next(struct seq_file *m, void *v, loff_t *_pos) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); return seq_list_next(v, &local->bss_list, _pos); } static void prism2_bss_list_proc_stop(struct seq_file *m, void *v) __releases(&local->lock) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); spin_unlock_bh(&local->lock); } @@ -198,7 +198,7 @@ static int prism2_crypt_proc_show(struct seq_file *m, void *v) static ssize_t prism2_pda_proc_read(struct file *file, char __user *buf, size_t count, loff_t *_pos) { - local_info_t *local = PDE_DATA(file_inode(file)); + local_info_t *local = pde_data(file_inode(file)); size_t off; if (local->pda == NULL || *_pos >= PRISM2_PDA_SIZE) @@ -272,7 +272,7 @@ static int prism2_io_debug_proc_read(char *page, char **start, off_t off, #ifndef PRISM2_NO_STATION_MODES static int prism2_scan_results_proc_show(struct seq_file *m, void *v) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); unsigned long entry; int i, len; struct hfa384x_hostscan_result *scanres; @@ -322,7 +322,7 @@ static int prism2_scan_results_proc_show(struct seq_file *m, void *v) static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); spin_lock_bh(&local->lock); /* We have a header (pos 0) + N results to show (pos 1...N) */ @@ -333,7 +333,7 @@ static void *prism2_scan_results_proc_start(struct seq_file *m, loff_t *_pos) static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t *_pos) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); ++*_pos; if (*_pos > local->last_scan_results_count) @@ -343,7 +343,7 @@ static void *prism2_scan_results_proc_next(struct seq_file *m, void *v, loff_t * static void prism2_scan_results_proc_stop(struct seq_file *m, void *v) { - local_info_t *local = PDE_DATA(file_inode(m->file)); + local_info_t *local = pde_data(file_inode(m->file)); spin_unlock_bh(&local->lock); } diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index e3a3dc3e45b43a3da287d0e0d515e95c260e5309..2987ad9271f64706f2e126575218d5621cfbc5b3 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -2746,7 +2746,7 @@ static ssize_t int_proc_write(struct file *file, const char __user *buffer, nr = nr * 10 + c; p++; } while (--len); - *(int *)PDE_DATA(file_inode(file)) = nr; + *(int *)pde_data(file_inode(file)) = nr; return count; } diff --git a/drivers/nubus/proc.c b/drivers/nubus/proc.c index 88e1f9a0faafd6a35f9bbf8ee7648b7b0eaf9b44..1fd667852271fd5495729f3782c97808b92e6fe8 100644 --- a/drivers/nubus/proc.c +++ b/drivers/nubus/proc.c @@ -93,30 +93,30 @@ struct nubus_proc_pde_data { static struct nubus_proc_pde_data * nubus_proc_alloc_pde_data(unsigned char *ptr, unsigned int size) { - struct nubus_proc_pde_data *pde_data; + struct nubus_proc_pde_data *pded; - pde_data = kmalloc(sizeof(*pde_data), GFP_KERNEL); - if (!pde_data) + pded = kmalloc(sizeof(*pded), GFP_KERNEL); + if (!pded) return NULL; - pde_data->res_ptr = ptr; - pde_data->res_size = size; - return pde_data; + pded->res_ptr = ptr; + pded->res_size = size; + return pded; } static int nubus_proc_rsrc_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct nubus_proc_pde_data *pde_data; + struct nubus_proc_pde_data *pded; - pde_data = PDE_DATA(inode); - if (!pde_data) + pded = pde_data(inode); + if (!pded) return 0; - if (pde_data->res_size > m->size) + if (pded->res_size > m->size) return -EFBIG; - if (pde_data->res_size) { + if (pded->res_size) { int lanes = (int)proc_get_parent_data(inode); struct nubus_dirent ent; @@ -124,11 +124,11 @@ static int nubus_proc_rsrc_show(struct seq_file *m, void *v) return 0; ent.mask = lanes; - ent.base = pde_data->res_ptr; + ent.base = pded->res_ptr; ent.data = 0; - nubus_seq_write_rsrc_mem(m, &ent, pde_data->res_size); + nubus_seq_write_rsrc_mem(m, &ent, pded->res_size); } else { - unsigned int data = (unsigned int)pde_data->res_ptr; + unsigned int data = (unsigned int)pded->res_ptr; seq_putc(m, data >> 16); seq_putc(m, data >> 8); @@ -142,18 +142,18 @@ void nubus_proc_add_rsrc_mem(struct proc_dir_entry *procdir, unsigned int size) { char name[9]; - struct nubus_proc_pde_data *pde_data; + struct nubus_proc_pde_data *pded; if (!procdir) return; snprintf(name, sizeof(name), "%x", ent->type); if (size) - pde_data = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size); + pded = nubus_proc_alloc_pde_data(nubus_dirptr(ent), size); else - pde_data = NULL; + pded = NULL; proc_create_single_data(name, S_IFREG | 0444, procdir, - nubus_proc_rsrc_show, pde_data); + nubus_proc_rsrc_show, pded); } void nubus_proc_add_rsrc(struct proc_dir_entry *procdir, diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5e0bfda04bd7b197706db239ee395e0c02ff9729..79005ea1a33e380b83ecd2e599619032e44f2f33 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -368,6 +368,7 @@ EXPORT_SYMBOL_GPL(nvme_complete_rq); void nvme_complete_batch_req(struct request *req) { + trace_nvme_complete_rq(req); nvme_cleanup_cmd(req); nvme_end_req_zoned(req); } @@ -4253,7 +4254,14 @@ static void nvme_async_event_work(struct work_struct *work) container_of(work, struct nvme_ctrl, async_event_work); nvme_aen_uevent(ctrl); - ctrl->ops->submit_async_event(ctrl); + + /* + * The transport drivers must guarantee AER submission here is safe by + * flushing ctrl async_event_work after changing the controller state + * from LIVE and before freeing the admin queue. + */ + if (ctrl->state == NVME_CTRL_LIVE) + ctrl->ops->submit_async_event(ctrl); } static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 7ae041e2b3fb0508f0016e6473e8a5e040902139..f79a66d4e22cec200b3a35d12c9c47efcbb07425 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1092,7 +1092,6 @@ static void __nvmf_concat_opt_tokens(struct seq_file *seq_file) static int nvmf_dev_show(struct seq_file *seq_file, void *private) { struct nvme_ctrl *ctrl; - int ret = 0; mutex_lock(&nvmf_dev_mutex); ctrl = seq_file->private; @@ -1106,7 +1105,7 @@ static int nvmf_dev_show(struct seq_file *seq_file, void *private) out_unlock: mutex_unlock(&nvmf_dev_mutex); - return ret; + return 0; } static int nvmf_dev_open(struct inode *inode, struct file *file) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index c3203ff1c654c546e74a235f87f8e5ff8f1035ee..1e3a09cad96113971b78c8ecda5b4a5d7c4ffb8a 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -170,6 +170,7 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts) { if (ctrl->state == NVME_CTRL_DELETING || + ctrl->state == NVME_CTRL_DELETING_NOIO || ctrl->state == NVME_CTRL_DEAD || strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || strcmp(opts->host->nqn, ctrl->opts->host->nqn) || diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d8585df2c2fd796acd1fae81ae357053851231ee..6a99ed68091589ad1d576d9a7879234b7b5986bb 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3391,7 +3391,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DEALLOCATE_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 850f84d204d053632241bac939726584f965e290..9c55e4be8a3986b5d9e4115fffa1f42cbfa7e962 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1200,6 +1200,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) struct nvme_rdma_ctrl, err_work); nvme_stop_keep_alive(&ctrl->ctrl); + flush_work(&ctrl->ctrl.async_event_work); nvme_rdma_teardown_io_queues(ctrl, false); nvme_start_queues(&ctrl->ctrl); nvme_rdma_teardown_admin_queue(ctrl, false); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4ceb28675fdf63b4b1dba86b0e790b925bf6b57b..891a36d02e7c7191471b66285d2c58da4ac53dd7 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -913,7 +913,15 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) static void nvme_tcp_fail_request(struct nvme_tcp_request *req) { - nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR); + if (nvme_tcp_async_req(req)) { + union nvme_result res = {}; + + nvme_complete_async_event(&req->queue->ctrl->ctrl, + cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res); + } else { + nvme_tcp_end_request(blk_mq_rq_from_pdu(req), + NVME_SC_HOST_PATH_ERROR); + } } static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) @@ -2096,6 +2104,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; nvme_stop_keep_alive(ctrl); + flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); /* unquiesce to fail fast pending requests */ nvme_start_queues(ctrl); diff --git a/drivers/of/base.c b/drivers/of/base.c index 8a24d37153b45de3749784fd1ff2003a0bc98f32..e7d92b67cb8a027e40187fe0417fdcca242f6b2d 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1420,15 +1420,18 @@ int of_phandle_iterator_args(struct of_phandle_iterator *it, return count; } -static int __of_parse_phandle_with_args(const struct device_node *np, - const char *list_name, - const char *cells_name, - int cell_count, int index, - struct of_phandle_args *out_args) +int __of_parse_phandle_with_args(const struct device_node *np, + const char *list_name, + const char *cells_name, + int cell_count, int index, + struct of_phandle_args *out_args) { struct of_phandle_iterator it; int rc, cur_index = 0; + if (index < 0) + return -EINVAL; + /* Loop over the phandles until all the requested entry is found */ of_for_each_phandle(&it, rc, np, list_name, cells_name, cell_count) { /* @@ -1471,82 +1474,7 @@ static int __of_parse_phandle_with_args(const struct device_node *np, of_node_put(it.node); return rc; } - -/** - * of_parse_phandle - Resolve a phandle property to a device_node pointer - * @np: Pointer to device node holding phandle property - * @phandle_name: Name of property holding a phandle value - * @index: For properties holding a table of phandles, this is the index into - * the table - * - * Return: The device_node pointer with refcount incremented. Use - * of_node_put() on it when done. - */ -struct device_node *of_parse_phandle(const struct device_node *np, - const char *phandle_name, int index) -{ - struct of_phandle_args args; - - if (index < 0) - return NULL; - - if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0, - index, &args)) - return NULL; - - return args.np; -} -EXPORT_SYMBOL(of_parse_phandle); - -/** - * of_parse_phandle_with_args() - Find a node pointed by phandle in a list - * @np: pointer to a device tree node containing a list - * @list_name: property name that contains a list - * @cells_name: property name that specifies phandles' arguments count - * @index: index of a phandle to parse out - * @out_args: optional pointer to output arguments structure (will be filled) - * - * This function is useful to parse lists of phandles and their arguments. - * Returns 0 on success and fills out_args, on error returns appropriate - * errno value. - * - * Caller is responsible to call of_node_put() on the returned out_args->np - * pointer. - * - * Example:: - * - * phandle1: node1 { - * #list-cells = <2>; - * }; - * - * phandle2: node2 { - * #list-cells = <1>; - * }; - * - * node3 { - * list = <&phandle1 1 2 &phandle2 3>; - * }; - * - * To get a device_node of the ``node2`` node you may call this: - * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args); - */ -int of_parse_phandle_with_args(const struct device_node *np, const char *list_name, - const char *cells_name, int index, - struct of_phandle_args *out_args) -{ - int cell_count = -1; - - if (index < 0) - return -EINVAL; - - /* If cells_name is NULL we assume a cell count of 0 */ - if (!cells_name) - cell_count = 0; - - return __of_parse_phandle_with_args(np, list_name, cells_name, - cell_count, index, out_args); -} -EXPORT_SYMBOL(of_parse_phandle_with_args); +EXPORT_SYMBOL(__of_parse_phandle_with_args); /** * of_parse_phandle_with_args_map() - Find a node pointed by phandle in a list and remap it @@ -1732,47 +1660,6 @@ free: } EXPORT_SYMBOL(of_parse_phandle_with_args_map); -/** - * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list - * @np: pointer to a device tree node containing a list - * @list_name: property name that contains a list - * @cell_count: number of argument cells following the phandle - * @index: index of a phandle to parse out - * @out_args: optional pointer to output arguments structure (will be filled) - * - * This function is useful to parse lists of phandles and their arguments. - * Returns 0 on success and fills out_args, on error returns appropriate - * errno value. - * - * Caller is responsible to call of_node_put() on the returned out_args->np - * pointer. - * - * Example:: - * - * phandle1: node1 { - * }; - * - * phandle2: node2 { - * }; - * - * node3 { - * list = <&phandle1 0 2 &phandle2 2 3>; - * }; - * - * To get a device_node of the ``node2`` node you may call this: - * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args); - */ -int of_parse_phandle_with_fixed_args(const struct device_node *np, - const char *list_name, int cell_count, - int index, struct of_phandle_args *out_args) -{ - if (index < 0) - return -EINVAL; - return __of_parse_phandle_with_args(np, list_name, NULL, cell_count, - index, out_args); -} -EXPORT_SYMBOL(of_parse_phandle_with_fixed_args); - /** * of_count_phandle_with_args() - Find the number of phandles references in a property * @np: pointer to a device tree node containing a list diff --git a/drivers/of/device.c b/drivers/of/device.c index b0800c260f64a219848888eee8762fafee0aef64..874f031442dc7bf1a631315591f7917deb3222f0 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -28,7 +28,7 @@ const struct of_device_id *of_match_device(const struct of_device_id *matches, const struct device *dev) { - if ((!matches) || (!dev->of_node)) + if (!matches || !dev->of_node || dev->of_node_reused) return NULL; return of_match_node(matches, dev->of_node); } diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 059566f5442919b37a9cf9a13f7e6a7e534f155f..9be007c9420f9e87a675a611b62f4f4f62faa661 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1003,7 +1003,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ioc->usg_calls++; #endif - while(sg_dma_len(sglist) && nents--) { + while (nents && sg_dma_len(sglist)) { #ifdef CCIO_COLLECT_STATS ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT; @@ -1011,6 +1011,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ccio_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist), direction, 0); ++sglist; + nents--; } DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c index cf91cb024be304307485a567177826ded6022fd3..1e4a5663d01122bffa88f0ecae49fdf3d5a6f3de 100644 --- a/drivers/parisc/led.c +++ b/drivers/parisc/led.c @@ -168,14 +168,14 @@ static int led_proc_show(struct seq_file *m, void *v) static int led_proc_open(struct inode *inode, struct file *file) { - return single_open(file, led_proc_show, PDE_DATA(inode)); + return single_open(file, led_proc_show, pde_data(inode)); } static ssize_t led_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - void *data = PDE_DATA(file_inode(file)); + void *data = pde_data(file_inode(file)); char *cur, lbuf[32]; int d; diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index 9513c39719d142fb96c74736b5af5296a49bf1d3..d9e51036a4facef004757d2951cc7e7ee23cc214 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -980,8 +980,10 @@ pdcs_register_pathentries(void) entry->kobj.kset = paths_kset; err = kobject_init_and_add(&entry->kobj, &ktype_pdcspath, NULL, "%s", entry->name); - if (err) + if (err) { + kobject_put(&entry->kobj); return err; + } /* kobject is now registered */ write_lock(&entry->rw_lock); diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index e60690d38d677addfa91d0bf4575e88176691390..374b9199878d4121da8d78849c62707b8635e151 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1047,7 +1047,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, spin_unlock_irqrestore(&ioc->res_lock, flags); #endif - while (sg_dma_len(sglist) && nents--) { + while (nents && sg_dma_len(sglist)) { sba_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist), direction, 0); @@ -1056,6 +1056,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ioc->usingle_calls--; /* kluge since call is unmap_sg() */ #endif ++sglist; + nents--; } DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c index 489586a4cdc7b8443eda32f63f84819ebaa89e74..768d33f9ebc87569d4e247bba2588a958e529b40 100644 --- a/drivers/pci/controller/cadence/pci-j721e.c +++ b/drivers/pci/controller/cadence/pci-j721e.c @@ -356,8 +356,8 @@ static int j721e_pcie_probe(struct platform_device *pdev) const struct j721e_pcie_data *data; struct cdns_pcie *cdns_pcie; struct j721e_pcie *pcie; - struct cdns_pcie_rc *rc; - struct cdns_pcie_ep *ep; + struct cdns_pcie_rc *rc = NULL; + struct cdns_pcie_ep *ep = NULL; struct gpio_desc *gpiod; void __iomem *base; struct clk *clk; @@ -376,6 +376,46 @@ static int j721e_pcie_probe(struct platform_device *pdev) if (!pcie) return -ENOMEM; + switch (mode) { + case PCI_MODE_RC: + if (!IS_ENABLED(CONFIG_PCIE_CADENCE_HOST)) + return -ENODEV; + + bridge = devm_pci_alloc_host_bridge(dev, sizeof(*rc)); + if (!bridge) + return -ENOMEM; + + if (!data->byte_access_allowed) + bridge->ops = &cdns_ti_pcie_host_ops; + rc = pci_host_bridge_priv(bridge); + rc->quirk_retrain_flag = data->quirk_retrain_flag; + rc->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag; + + cdns_pcie = &rc->pcie; + cdns_pcie->dev = dev; + cdns_pcie->ops = &j721e_pcie_ops; + pcie->cdns_pcie = cdns_pcie; + break; + case PCI_MODE_EP: + if (!IS_ENABLED(CONFIG_PCIE_CADENCE_EP)) + return -ENODEV; + + ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL); + if (!ep) + return -ENOMEM; + + ep->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag; + + cdns_pcie = &ep->pcie; + cdns_pcie->dev = dev; + cdns_pcie->ops = &j721e_pcie_ops; + pcie->cdns_pcie = cdns_pcie; + break; + default: + dev_err(dev, "INVALID device type %d\n", mode); + return 0; + } + pcie->mode = mode; pcie->linkdown_irq_regfield = data->linkdown_irq_regfield; @@ -426,28 +466,6 @@ static int j721e_pcie_probe(struct platform_device *pdev) switch (mode) { case PCI_MODE_RC: - if (!IS_ENABLED(CONFIG_PCIE_CADENCE_HOST)) { - ret = -ENODEV; - goto err_get_sync; - } - - bridge = devm_pci_alloc_host_bridge(dev, sizeof(*rc)); - if (!bridge) { - ret = -ENOMEM; - goto err_get_sync; - } - - if (!data->byte_access_allowed) - bridge->ops = &cdns_ti_pcie_host_ops; - rc = pci_host_bridge_priv(bridge); - rc->quirk_retrain_flag = data->quirk_retrain_flag; - rc->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag; - - cdns_pcie = &rc->pcie; - cdns_pcie->dev = dev; - cdns_pcie->ops = &j721e_pcie_ops; - pcie->cdns_pcie = cdns_pcie; - gpiod = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); if (IS_ERR(gpiod)) { ret = PTR_ERR(gpiod); @@ -497,23 +515,6 @@ static int j721e_pcie_probe(struct platform_device *pdev) break; case PCI_MODE_EP: - if (!IS_ENABLED(CONFIG_PCIE_CADENCE_EP)) { - ret = -ENODEV; - goto err_get_sync; - } - - ep = devm_kzalloc(dev, sizeof(*ep), GFP_KERNEL); - if (!ep) { - ret = -ENOMEM; - goto err_get_sync; - } - ep->quirk_detect_quiet_flag = data->quirk_detect_quiet_flag; - - cdns_pcie = &ep->pcie; - cdns_pcie->dev = dev; - cdns_pcie->ops = &j721e_pcie_ops; - pcie->cdns_pcie = cdns_pcie; - ret = cdns_pcie_init_phy(dev, cdns_pcie); if (ret) { dev_err(dev, "Failed to init phy\n"); @@ -525,8 +526,6 @@ static int j721e_pcie_probe(struct platform_device *pdev) goto err_pcie_setup; break; - default: - dev_err(dev, "INVALID device type %d\n", mode); } return 0; diff --git a/drivers/pci/controller/dwc/pci-dra7xx.c b/drivers/pci/controller/dwc/pci-dra7xx.c index 12d19183e746f662bf8180e8090944192420d79e..dfcdeb432dc8de12f5cfcbd267ee4feedcc86abd 100644 --- a/drivers/pci/controller/dwc/pci-dra7xx.c +++ b/drivers/pci/controller/dwc/pci-dra7xx.c @@ -213,7 +213,7 @@ static int dra7xx_pcie_handle_msi(struct pcie_port *pp, int index) if (!val) return 0; - pos = find_next_bit(&val, MAX_MSI_IRQS_PER_CTRL, 0); + pos = find_first_bit(&val, MAX_MSI_IRQS_PER_CTRL); while (pos != MAX_MSI_IRQS_PER_CTRL) { generic_handle_domain_irq(pp->irq_domain, (index * MAX_MSI_IRQS_PER_CTRL) + pos); diff --git a/drivers/pci/controller/dwc/pcie-kirin.c b/drivers/pci/controller/dwc/pcie-kirin.c index fa6886d66488a0208bba5434308cf2acc7037bbc..c625fc6bb28711b8c635274ab002a6f4901b80cb 100644 --- a/drivers/pci/controller/dwc/pcie-kirin.c +++ b/drivers/pci/controller/dwc/pcie-kirin.c @@ -756,22 +756,28 @@ static int __exit kirin_pcie_remove(struct platform_device *pdev) return 0; } +struct kirin_pcie_data { + enum pcie_kirin_phy_type phy_type; +}; + +static const struct kirin_pcie_data kirin_960_data = { + .phy_type = PCIE_KIRIN_INTERNAL_PHY, +}; + +static const struct kirin_pcie_data kirin_970_data = { + .phy_type = PCIE_KIRIN_EXTERNAL_PHY, +}; + static const struct of_device_id kirin_pcie_match[] = { - { - .compatible = "hisilicon,kirin960-pcie", - .data = (void *)PCIE_KIRIN_INTERNAL_PHY - }, - { - .compatible = "hisilicon,kirin970-pcie", - .data = (void *)PCIE_KIRIN_EXTERNAL_PHY - }, + { .compatible = "hisilicon,kirin960-pcie", .data = &kirin_960_data }, + { .compatible = "hisilicon,kirin970-pcie", .data = &kirin_970_data }, {}, }; static int kirin_pcie_probe(struct platform_device *pdev) { - enum pcie_kirin_phy_type phy_type; struct device *dev = &pdev->dev; + const struct kirin_pcie_data *data; struct kirin_pcie *kirin_pcie; struct dw_pcie *pci; int ret; @@ -781,13 +787,12 @@ static int kirin_pcie_probe(struct platform_device *pdev) return -EINVAL; } - phy_type = (long)of_device_get_match_data(dev); - if (!phy_type) { + data = of_device_get_match_data(dev); + if (!data) { dev_err(dev, "OF data missing\n"); return -EINVAL; } - kirin_pcie = devm_kzalloc(dev, sizeof(struct kirin_pcie), GFP_KERNEL); if (!kirin_pcie) return -ENOMEM; @@ -800,7 +805,7 @@ static int kirin_pcie_probe(struct platform_device *pdev) pci->ops = &kirin_dw_pcie_ops; pci->pp.ops = &kirin_pcie_host_ops; kirin_pcie->pci = pci; - kirin_pcie->type = phy_type; + kirin_pcie->type = data->phy_type; ret = kirin_pcie_get_resource(kirin_pcie, pdev); if (ret) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index 20ea2ee330b8776a2c22e8a113b3bec0404427d1..ae0bc2fee4ca8b8b83c83d4567e7879df0159115 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -2155,8 +2155,17 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus) if (!hv_dev) continue; - if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY) - set_dev_node(&dev->dev, hv_dev->desc.virtual_numa_node); + if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY && + hv_dev->desc.virtual_numa_node < num_possible_nodes()) + /* + * The kernel may boot with some NUMA nodes offline + * (e.g. in a KDUMP kernel) or with NUMA disabled via + * "numa=off". In those cases, adjust the host provided + * NUMA node to a valid NUMA node used by the kernel. + */ + set_dev_node(&dev->dev, + numa_map_to_online_node( + hv_dev->desc.virtual_numa_node)); put_pcichild(hv_dev); } diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c index 3824862ea144f41de6c2ada825976dd481cc0a3c..33eb37a2225c14c7e9be5f53c45b9ca7cb96bef1 100644 --- a/drivers/pci/controller/pcie-mt7621.c +++ b/drivers/pci/controller/pcie-mt7621.c @@ -109,15 +109,6 @@ static inline void pcie_write(struct mt7621_pcie *pcie, u32 val, u32 reg) writel_relaxed(val, pcie->base + reg); } -static inline void pcie_rmw(struct mt7621_pcie *pcie, u32 reg, u32 clr, u32 set) -{ - u32 val = readl_relaxed(pcie->base + reg); - - val &= ~clr; - val |= set; - writel_relaxed(val, pcie->base + reg); -} - static inline u32 pcie_port_read(struct mt7621_pcie_port *port, u32 reg) { return readl_relaxed(port->base + reg); @@ -557,7 +548,7 @@ static struct platform_driver mt7621_pcie_driver = { .remove = mt7621_pcie_remove, .driver = { .name = "mt7621-pci", - .of_match_table = of_match_ptr(mt7621_pcie_ids), + .of_match_table = mt7621_pcie_ids, }, }; builtin_platform_driver(mt7621_pcie_driver); diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index 0d63541c4052aa600e7e26620ef8152c2f0870e2..e9cf318e6670f7cab5bfa93892295bf0b6d9f4e5 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -28,6 +28,7 @@ void pci_msi_teardown_msi_irqs(struct pci_dev *dev) msi_domain_free_irqs_descs_locked(domain, &dev->dev); else pci_msi_legacy_teardown_msi_irqs(dev); + msi_free_msi_descs(&dev->dev); } /** @@ -171,8 +172,7 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) pci_msi_domain_update_chip_ops(info); - info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS | - MSI_FLAG_FREE_MSI_DESCS; + info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS; if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) info->flags |= MSI_FLAG_MUST_REACTIVATE; diff --git a/drivers/pci/msi/legacy.c b/drivers/pci/msi/legacy.c index cdbb4689db78bd1880f93277a8e2a68c55826247..db761adef652b63f256987ac2f2fcf8925c35a2c 100644 --- a/drivers/pci/msi/legacy.c +++ b/drivers/pci/msi/legacy.c @@ -77,5 +77,4 @@ void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev) { msi_device_destroy_sysfs(&dev->dev); arch_teardown_msi_irqs(dev); - msi_free_msi_descs(&dev->dev); } diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index c19c7ca581868a366289932163621ad8ae794c34..9037a7827eca75918a6376bc0ab38e3905fa7fe2 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -1111,7 +1111,8 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) if (!desc) return cpu_possible_mask; - if (WARN_ON_ONCE(!desc->affinity)) + /* MSI[X] interrupts can be allocated without affinity descriptor */ + if (!desc->affinity) return NULL; /* diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index bda630889f95550b111f2cbd2299f0be11970eee..604feeb84ee402ede18311d45c477768f76f87b1 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -166,6 +166,9 @@ static int pcie_init_service_irqs(struct pci_dev *dev, int *irqs, int mask) { int ret, i; + for (i = 0; i < PCIE_PORT_DEVICE_MAXSERVICES; i++) + irqs[i] = -1; + /* * If we support PME but can't use MSI/MSI-X for it, we have to * fall back to INTx or other interrupts, e.g., a system shared @@ -314,10 +317,8 @@ static int pcie_device_init(struct pci_dev *pdev, int service, int irq) */ int pcie_port_device_register(struct pci_dev *dev) { - int status, capabilities, irq_services, i, nr_service; - int irqs[PCIE_PORT_DEVICE_MAXSERVICES] = { - [0 ... PCIE_PORT_DEVICE_MAXSERVICES-1] = -1 - }; + int status, capabilities, i, nr_service; + int irqs[PCIE_PORT_DEVICE_MAXSERVICES]; /* Enable PCI Express port device */ status = pci_enable_device(dev); @@ -330,32 +331,18 @@ int pcie_port_device_register(struct pci_dev *dev) return 0; pci_set_master(dev); - - irq_services = 0; - if (IS_ENABLED(CONFIG_PCIE_PME)) - irq_services |= PCIE_PORT_SERVICE_PME; - if (IS_ENABLED(CONFIG_PCIEAER)) - irq_services |= PCIE_PORT_SERVICE_AER; - if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) - irq_services |= PCIE_PORT_SERVICE_HP; - if (IS_ENABLED(CONFIG_PCIE_DPC)) - irq_services |= PCIE_PORT_SERVICE_DPC; - irq_services &= capabilities; - - if (irq_services) { - /* - * Initialize service IRQs. Don't use service devices that - * require interrupts if there is no way to generate them. - * However, some drivers may have a polling mode (e.g. - * pciehp_poll_mode) that can be used in the absence of IRQs. - * Allow them to determine if that is to be used. - */ - status = pcie_init_service_irqs(dev, irqs, irq_services); - if (status) { - irq_services &= PCIE_PORT_SERVICE_HP; - if (!irq_services) - goto error_disable; - } + /* + * Initialize service irqs. Don't use service devices that + * require interrupts if there is no way to generate them. + * However, some drivers may have a polling mode (e.g. pciehp_poll_mode) + * that can be used in the absence of irqs. Allow them to determine + * if that is to be used. + */ + status = pcie_init_service_irqs(dev, irqs, capabilities); + if (status) { + capabilities &= PCIE_PORT_SERVICE_HP; + if (!capabilities) + goto error_disable; } /* Allocate child services if any */ diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index cb18f8a13ab66fce5bfbc3f93b2feaba7535c61f..9c7edec64f7e24bed44b3cd8e6afd3b1b909a0d1 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -21,14 +21,14 @@ static int proc_initialized; /* = 0 */ static loff_t proc_bus_pci_lseek(struct file *file, loff_t off, int whence) { - struct pci_dev *dev = PDE_DATA(file_inode(file)); + struct pci_dev *dev = pde_data(file_inode(file)); return fixed_size_llseek(file, off, whence, dev->cfg_size); } static ssize_t proc_bus_pci_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - struct pci_dev *dev = PDE_DATA(file_inode(file)); + struct pci_dev *dev = pde_data(file_inode(file)); unsigned int pos = *ppos; unsigned int cnt, size; @@ -114,7 +114,7 @@ static ssize_t proc_bus_pci_write(struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos) { struct inode *ino = file_inode(file); - struct pci_dev *dev = PDE_DATA(ino); + struct pci_dev *dev = pde_data(ino); int pos = *ppos; int size = dev->cfg_size; int cnt, ret; @@ -196,7 +196,7 @@ struct pci_filp_private { static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - struct pci_dev *dev = PDE_DATA(file_inode(file)); + struct pci_dev *dev = pde_data(file_inode(file)); #ifdef HAVE_PCI_MMAP struct pci_filp_private *fpriv = file->private_data; #endif /* HAVE_PCI_MMAP */ @@ -244,7 +244,7 @@ static long proc_bus_pci_ioctl(struct file *file, unsigned int cmd, #ifdef HAVE_PCI_MMAP static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma) { - struct pci_dev *dev = PDE_DATA(file_inode(file)); + struct pci_dev *dev = pde_data(file_inode(file)); struct pci_filp_private *fpriv = file->private_data; int i, ret, write_combine = 0, res_bit = IORESOURCE_MEM; diff --git a/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c b/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c index cd2332bf0e31ad8d562120343683541435b1bf88..fdbd64c03e12b4efdf2d9e30d22a10d20c47c2f5 100644 --- a/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c +++ b/drivers/phy/amlogic/phy-meson-axg-mipi-dphy.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -250,7 +251,7 @@ static int phy_meson_axg_mipi_dphy_power_on(struct phy *phy) (DIV_ROUND_UP(priv->config.clk_zero, temp) << 16) | (DIV_ROUND_UP(priv->config.clk_prepare, temp) << 24)); regmap_write(priv->regmap, MIPI_DSI_CLK_TIM1, - DIV_ROUND_UP(priv->config.clk_pre, temp)); + DIV_ROUND_UP(priv->config.clk_pre, BITS_PER_BYTE)); regmap_write(priv->regmap, MIPI_DSI_HS_TIM, DIV_ROUND_UP(priv->config.hs_exit, temp) | diff --git a/drivers/phy/broadcom/Kconfig b/drivers/phy/broadcom/Kconfig index f81e237420799661a2d3a11b6b64a65c640c84d3..849c4204f550691f5fb66cf9afceb80b0c522d4c 100644 --- a/drivers/phy/broadcom/Kconfig +++ b/drivers/phy/broadcom/Kconfig @@ -97,8 +97,7 @@ config PHY_BRCM_USB depends on OF select GENERIC_PHY select SOC_BRCMSTB if ARCH_BRCMSTB - default ARCH_BCM4908 - default ARCH_BRCMSTB + default ARCH_BCM4908 || ARCH_BRCMSTB help Enable this to support the Broadcom STB USB PHY. This driver is required by the USB XHCI, EHCI and OHCI diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index 116fb23aebd9966587d0c3aca4db24b66235662f..0f1deb6e0eabf2fdf30999d649889d842402e651 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "phy-brcm-usb-init.h" @@ -70,12 +71,35 @@ struct brcm_usb_phy_data { int init_count; int wake_irq; struct brcm_usb_phy phys[BRCM_USB_PHY_ID_MAX]; + struct notifier_block pm_notifier; + bool pm_active; }; static s8 *node_reg_names[BRCM_REGS_MAX] = { "crtl", "xhci_ec", "xhci_gbl", "usb_phy", "usb_mdio", "bdc_ec" }; +static int brcm_pm_notifier(struct notifier_block *notifier, + unsigned long pm_event, + void *unused) +{ + struct brcm_usb_phy_data *priv = + container_of(notifier, struct brcm_usb_phy_data, pm_notifier); + + switch (pm_event) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + priv->pm_active = true; + break; + case PM_POST_RESTORE: + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + priv->pm_active = false; + break; + } + return NOTIFY_DONE; +} + static irqreturn_t brcm_usb_phy_wake_isr(int irq, void *dev_id) { struct phy *gphy = dev_id; @@ -91,6 +115,9 @@ static int brcm_usb_phy_init(struct phy *gphy) struct brcm_usb_phy_data *priv = container_of(phy, struct brcm_usb_phy_data, phys[phy->id]); + if (priv->pm_active) + return 0; + /* * Use a lock to make sure a second caller waits until * the base phy is inited before using it. @@ -120,6 +147,9 @@ static int brcm_usb_phy_exit(struct phy *gphy) struct brcm_usb_phy_data *priv = container_of(phy, struct brcm_usb_phy_data, phys[phy->id]); + if (priv->pm_active) + return 0; + dev_dbg(&gphy->dev, "EXIT\n"); if (phy->id == BRCM_USB_PHY_2_0) brcm_usb_uninit_eohci(&priv->ini); @@ -488,6 +518,9 @@ static int brcm_usb_phy_probe(struct platform_device *pdev) if (err) return err; + priv->pm_notifier.notifier_call = brcm_pm_notifier; + register_pm_notifier(&priv->pm_notifier); + mutex_init(&priv->mutex); /* make sure invert settings are correct */ @@ -528,7 +561,10 @@ static int brcm_usb_phy_probe(struct platform_device *pdev) static int brcm_usb_phy_remove(struct platform_device *pdev) { + struct brcm_usb_phy_data *priv = dev_get_drvdata(&pdev->dev); + sysfs_remove_group(&pdev->dev.kobj, &brcm_usb_phy_group); + unregister_pm_notifier(&priv->pm_notifier); return 0; } @@ -539,6 +575,7 @@ static int brcm_usb_phy_suspend(struct device *dev) struct brcm_usb_phy_data *priv = dev_get_drvdata(dev); if (priv->init_count) { + dev_dbg(dev, "SUSPEND\n"); priv->ini.wake_enabled = device_may_wakeup(dev); if (priv->phys[BRCM_USB_PHY_3_0].inited) brcm_usb_uninit_xhci(&priv->ini); @@ -578,6 +615,7 @@ static int brcm_usb_phy_resume(struct device *dev) * Uninitialize anything that wasn't previously initialized. */ if (priv->init_count) { + dev_dbg(dev, "RESUME\n"); if (priv->wake_irq >= 0) disable_irq_wake(priv->wake_irq); brcm_usb_init_common(&priv->ini); diff --git a/drivers/phy/cadence/phy-cadence-sierra.c b/drivers/phy/cadence/phy-cadence-sierra.c index da24acd266666a1fc0f03e8c623e08017ffaae9a..e265647e29a249f0336c047fe58b410885c57b3a 100644 --- a/drivers/phy/cadence/phy-cadence-sierra.c +++ b/drivers/phy/cadence/phy-cadence-sierra.c @@ -1338,7 +1338,7 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; const struct cdns_sierra_data *data; unsigned int id_value; - int i, ret, node = 0; + int ret, node = 0; void __iomem *base; struct device_node *dn = dev->of_node, *child; @@ -1416,7 +1416,8 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev) dev_err(dev, "failed to get reset %s\n", child->full_name); ret = PTR_ERR(sp->phys[node].lnk_rst); - goto put_child2; + of_node_put(child); + goto put_control; } if (!sp->autoconf) { @@ -1424,7 +1425,9 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "missing property in node %s\n", child->name); - goto put_child; + of_node_put(child); + reset_control_put(sp->phys[node].lnk_rst); + goto put_control; } } @@ -1434,7 +1437,9 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev) if (IS_ERR(gphy)) { ret = PTR_ERR(gphy); - goto put_child; + of_node_put(child); + reset_control_put(sp->phys[node].lnk_rst); + goto put_control; } sp->phys[node].phy = gphy; phy_set_drvdata(gphy, &sp->phys[node]); @@ -1446,26 +1451,28 @@ static int cdns_sierra_phy_probe(struct platform_device *pdev) if (sp->num_lanes > SIERRA_MAX_LANES) { ret = -EINVAL; dev_err(dev, "Invalid lane configuration\n"); - goto put_child2; + goto put_control; } /* If more than one subnode, configure the PHY as multilink */ if (!sp->autoconf && sp->nsubnodes > 1) { ret = cdns_sierra_phy_configure_multilink(sp); if (ret) - goto put_child2; + goto put_control; } pm_runtime_enable(dev); phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate); - return PTR_ERR_OR_ZERO(phy_provider); - -put_child: - node++; -put_child2: - for (i = 0; i < node; i++) - reset_control_put(sp->phys[i].lnk_rst); - of_node_put(child); + if (IS_ERR(phy_provider)) { + ret = PTR_ERR(phy_provider); + goto put_control; + } + + return 0; + +put_control: + while (--node >= 0) + reset_control_put(sp->phys[node].lnk_rst); clk_disable: cdns_sierra_phy_disable_clocks(sp); reset_control_assert(sp->apb_rst); diff --git a/drivers/phy/mediatek/phy-mtk-tphy.c b/drivers/phy/mediatek/phy-mtk-tphy.c index 6d307102f4f6c17603cd9291654f1a06b37828a4..8ee7682b8e93eb86346d1e55e1f6963a5f8f77d0 100644 --- a/drivers/phy/mediatek/phy-mtk-tphy.c +++ b/drivers/phy/mediatek/phy-mtk-tphy.c @@ -992,7 +992,7 @@ static int phy_efuse_get(struct mtk_tphy *tphy, struct mtk_phy_instance *instanc /* no efuse, ignore it */ if (!instance->efuse_intr && !instance->efuse_rx_imp && - !instance->efuse_rx_imp) { + !instance->efuse_tx_imp) { dev_warn(dev, "no u3 intr efuse, but dts enable it\n"); instance->efuse_sw_en = 0; break; diff --git a/drivers/phy/phy-core-mipi-dphy.c b/drivers/phy/phy-core-mipi-dphy.c index 288c9c67aa74872328f48961b247a8c298bc3278..ccb4045685cdd674fed7555bc7be851e90aa96e0 100644 --- a/drivers/phy/phy-core-mipi-dphy.c +++ b/drivers/phy/phy-core-mipi-dphy.c @@ -36,7 +36,7 @@ int phy_mipi_dphy_get_default_config(unsigned long pixel_clock, cfg->clk_miss = 0; cfg->clk_post = 60000 + 52 * ui; - cfg->clk_pre = 8000; + cfg->clk_pre = 8; cfg->clk_prepare = 38000; cfg->clk_settle = 95000; cfg->clk_term_en = 0; @@ -97,7 +97,7 @@ int phy_mipi_dphy_config_validate(struct phy_configure_opts_mipi_dphy *cfg) if (cfg->clk_post < (60000 + 52 * ui)) return -EINVAL; - if (cfg->clk_pre < 8000) + if (cfg->clk_pre < 8) return -EINVAL; if (cfg->clk_prepare < 38000 || cfg->clk_prepare > 95000) diff --git a/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c b/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c index 347dc79a18c18a6f235bc47c89b4b861cdd0c60a..630e01b5c19b9543d1f9afb8af4216dfad4b40c3 100644 --- a/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c +++ b/drivers/phy/rockchip/phy-rockchip-inno-dsidphy.c @@ -5,6 +5,7 @@ * Author: Wyon Bi */ +#include #include #include #include @@ -364,7 +365,7 @@ static void inno_dsidphy_mipi_mode_enable(struct inno_dsidphy *inno) * The value of counter for HS Tclk-pre * Tclk-pre = Tpin_txbyteclkhs * value */ - clk_pre = DIV_ROUND_UP(cfg->clk_pre, t_txbyteclkhs); + clk_pre = DIV_ROUND_UP(cfg->clk_pre, BITS_PER_BYTE); /* * The value of counter for HS Tlpx Time diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index 2ce9bfd783d45cf148fdfd87f29690219d3868ea..007a23c78d56249de1b94bb3277764cc7d154023 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -304,7 +304,7 @@ static int stm32_usbphyc_pll_enable(struct stm32_usbphyc *usbphyc) ret = __stm32_usbphyc_pll_disable(usbphyc); if (ret) - return ret; + goto dec_n_pll_cons; } ret = stm32_usbphyc_regulators_enable(usbphyc); diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c index b3384c31637ae7e59c04b17a19f988da877cbd27..da546c35d1d5ca872f4038d484feb7f58e8e8832 100644 --- a/drivers/phy/ti/phy-j721e-wiz.c +++ b/drivers/phy/ti/phy-j721e-wiz.c @@ -233,6 +233,7 @@ static const struct clk_div_table clk_div_table[] = { { .val = 1, .div = 2, }, { .val = 2, .div = 4, }, { .val = 3, .div = 8, }, + { /* sentinel */ }, }; static const struct wiz_clk_div_sel clk_div_sel[] = { diff --git a/drivers/phy/xilinx/phy-zynqmp.c b/drivers/phy/xilinx/phy-zynqmp.c index f478d8a17115b13e998692a2840e5bdca00e29ad..9be9535ad7ab7ec67daeffe82e63efd77f8a7c3e 100644 --- a/drivers/phy/xilinx/phy-zynqmp.c +++ b/drivers/phy/xilinx/phy-zynqmp.c @@ -134,7 +134,8 @@ #define PROT_BUS_WIDTH_10 0x0 #define PROT_BUS_WIDTH_20 0x1 #define PROT_BUS_WIDTH_40 0x2 -#define PROT_BUS_WIDTH_SHIFT 2 +#define PROT_BUS_WIDTH_SHIFT(n) ((n) * 2) +#define PROT_BUS_WIDTH_MASK(n) GENMASK((n) * 2 + 1, (n) * 2) /* Number of GT lanes */ #define NUM_LANES 4 @@ -445,12 +446,12 @@ static void xpsgtr_phy_init_sata(struct xpsgtr_phy *gtr_phy) static void xpsgtr_phy_init_sgmii(struct xpsgtr_phy *gtr_phy) { struct xpsgtr_dev *gtr_dev = gtr_phy->dev; + u32 mask = PROT_BUS_WIDTH_MASK(gtr_phy->lane); + u32 val = PROT_BUS_WIDTH_10 << PROT_BUS_WIDTH_SHIFT(gtr_phy->lane); /* Set SGMII protocol TX and RX bus width to 10 bits. */ - xpsgtr_write(gtr_dev, TX_PROT_BUS_WIDTH, - PROT_BUS_WIDTH_10 << (gtr_phy->lane * PROT_BUS_WIDTH_SHIFT)); - xpsgtr_write(gtr_dev, RX_PROT_BUS_WIDTH, - PROT_BUS_WIDTH_10 << (gtr_phy->lane * PROT_BUS_WIDTH_SHIFT)); + xpsgtr_clr_set(gtr_dev, TX_PROT_BUS_WIDTH, mask, val); + xpsgtr_clr_set(gtr_dev, RX_PROT_BUS_WIDTH, mask, val); xpsgtr_bypass_scrambler_8b10b(gtr_phy); } diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile index 08c364d611f5ae3ead1a6cef898067639cfd4895..f64d29f614ec77d4a3262cc1e7c97c837adf3f23 100644 --- a/drivers/pinctrl/Makefile +++ b/drivers/pinctrl/Makefile @@ -42,9 +42,9 @@ obj-$(CONFIG_PINCTRL_PISTACHIO) += pinctrl-pistachio.o obj-$(CONFIG_PINCTRL_RK805) += pinctrl-rk805.o obj-$(CONFIG_PINCTRL_ROCKCHIP) += pinctrl-rockchip.o obj-$(CONFIG_PINCTRL_SINGLE) += pinctrl-single.o +obj-$(CONFIG_PINCTRL_ST) += pinctrl-st.o obj-$(CONFIG_PINCTRL_STARFIVE) += pinctrl-starfive.o obj-$(CONFIG_PINCTRL_STMFX) += pinctrl-stmfx.o -obj-$(CONFIG_PINCTRL_ST) += pinctrl-st.o obj-$(CONFIG_PINCTRL_SX150X) += pinctrl-sx150x.o obj-$(CONFIG_PINCTRL_TB10X) += pinctrl-tb10x.o obj-$(CONFIG_PINCTRL_THUNDERBAY) += pinctrl-thunderbay.o diff --git a/drivers/pinctrl/bcm/Kconfig b/drivers/pinctrl/bcm/Kconfig index 5123f4c33854bf74f911ff14173a9c6ba7630f5c..ac1e400bbbac559fd97a76c812c4c758062bb74d 100644 --- a/drivers/pinctrl/bcm/Kconfig +++ b/drivers/pinctrl/bcm/Kconfig @@ -35,6 +35,7 @@ config PINCTRL_BCM63XX select PINCONF select GENERIC_PINCONF select GPIOLIB + select REGMAP select GPIO_REGMAP config PINCTRL_BCM6318 diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index c4ebfa852b4249f7edbd14cc0b1256112815fed2..47e433e09c5ce319e97fe1384941d278c9d435ba 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -1269,16 +1269,18 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) sizeof(*girq->parents), GFP_KERNEL); if (!girq->parents) { - pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); - return -ENOMEM; + err = -ENOMEM; + goto out_remove; } if (is_7211) { pc->wake_irq = devm_kcalloc(dev, BCM2835_NUM_IRQS, sizeof(*pc->wake_irq), GFP_KERNEL); - if (!pc->wake_irq) - return -ENOMEM; + if (!pc->wake_irq) { + err = -ENOMEM; + goto out_remove; + } } /* @@ -1306,8 +1308,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) len = strlen(dev_name(pc->dev)) + 16; name = devm_kzalloc(pc->dev, len, GFP_KERNEL); - if (!name) - return -ENOMEM; + if (!name) { + err = -ENOMEM; + goto out_remove; + } snprintf(name, len, "%s:bank%d", dev_name(pc->dev), i); @@ -1326,11 +1330,14 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) err = gpiochip_add_data(&pc->gpio_chip, pc); if (err) { dev_err(dev, "could not add GPIO chip\n"); - pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); - return err; + goto out_remove; } return 0; + +out_remove: + pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); + return err; } static struct platform_driver bcm2835_pinctrl_driver = { diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c index abffda1fd51eb18517e3068cf1d699dd0cab08c2..1d58182690767b694144367616eac428ef438f9d 100644 --- a/drivers/pinctrl/intel/pinctrl-cherryview.c +++ b/drivers/pinctrl/intel/pinctrl-cherryview.c @@ -1471,8 +1471,9 @@ static void chv_gpio_irq_handler(struct irq_desc *desc) offset = cctx->intr_lines[intr_line]; if (offset == CHV_INVALID_HWIRQ) { - dev_err(dev, "interrupt on unused interrupt line %u\n", intr_line); - continue; + dev_warn_once(dev, "interrupt on unmapped interrupt line %u\n", intr_line); + /* Some boards expect hwirq 0 to trigger in this case */ + offset = 0; } generic_handle_domain_irq(gc->irq.domain, offset); diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 85750974d182522d7d3dd4f5ad4a8bcbd9784f8a..826d494f3cc66e66419e9d49f36de4c7e530a2a2 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -451,8 +451,8 @@ static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) value &= ~PADCFG0_PMODE_MASK; value |= PADCFG0_PMODE_GPIO; - /* Disable input and output buffers */ - value |= PADCFG0_GPIORXDIS; + /* Disable TX buffer and enable RX (this will be input) */ + value &= ~PADCFG0_GPIORXDIS; value |= PADCFG0_GPIOTXDIS; /* Disable SCI/SMI/NMI generation */ @@ -497,9 +497,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, intel_gpio_set_gpio_mode(padcfg0); - /* Disable TX buffer and enable RX (this will be input) */ - __intel_gpio_set_direction(padcfg0, true); - raw_spin_unlock_irqrestore(&pctrl->lock, flags); return 0; @@ -1115,9 +1112,6 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned int type) intel_gpio_set_gpio_mode(reg); - /* Disable TX buffer and enable RX (this will be input) */ - __intel_gpio_set_direction(reg, true); - value = readl(reg); value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV); @@ -1216,6 +1210,39 @@ static irqreturn_t intel_gpio_irq(int irq, void *data) return IRQ_RETVAL(ret); } +static void intel_gpio_irq_init(struct intel_pinctrl *pctrl) +{ + int i; + + for (i = 0; i < pctrl->ncommunities; i++) { + const struct intel_community *community; + void __iomem *base; + unsigned int gpp; + + community = &pctrl->communities[i]; + base = community->regs; + + for (gpp = 0; gpp < community->ngpps; gpp++) { + /* Mask and clear all interrupts */ + writel(0, base + community->ie_offset + gpp * 4); + writel(0xffff, base + community->is_offset + gpp * 4); + } + } +} + +static int intel_gpio_irq_init_hw(struct gpio_chip *gc) +{ + struct intel_pinctrl *pctrl = gpiochip_get_data(gc); + + /* + * Make sure the interrupt lines are in a proper state before + * further configuration. + */ + intel_gpio_irq_init(pctrl); + + return 0; +} + static int intel_gpio_add_community_ranges(struct intel_pinctrl *pctrl, const struct intel_community *community) { @@ -1320,6 +1347,7 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq) girq->num_parents = 0; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; + girq->init_hw = intel_gpio_irq_init_hw; ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { @@ -1695,26 +1723,6 @@ int intel_pinctrl_suspend_noirq(struct device *dev) } EXPORT_SYMBOL_GPL(intel_pinctrl_suspend_noirq); -static void intel_gpio_irq_init(struct intel_pinctrl *pctrl) -{ - size_t i; - - for (i = 0; i < pctrl->ncommunities; i++) { - const struct intel_community *community; - void __iomem *base; - unsigned int gpp; - - community = &pctrl->communities[i]; - base = community->regs; - - for (gpp = 0; gpp < community->ngpps; gpp++) { - /* Mask and clear all interrupts */ - writel(0, base + community->ie_offset + gpp * 4); - writel(0xffff, base + community->is_offset + gpp * 4); - } - } -} - static bool intel_gpio_update_reg(void __iomem *reg, u32 mask, u32 value) { u32 curr, updated; diff --git a/drivers/pinctrl/pinctrl-microchip-sgpio.c b/drivers/pinctrl/pinctrl-microchip-sgpio.c index 8e081c90bdb2468137ff61a949e300efe15ba57a..639f1130e9892ea9996d56b47f19148463bd76ca 100644 --- a/drivers/pinctrl/pinctrl-microchip-sgpio.c +++ b/drivers/pinctrl/pinctrl-microchip-sgpio.c @@ -137,7 +137,8 @@ static inline int sgpio_addr_to_pin(struct sgpio_priv *priv, int port, int bit) static inline u32 sgpio_get_addr(struct sgpio_priv *priv, u32 rno, u32 off) { - return priv->properties->regoff[rno] + off; + return (priv->properties->regoff[rno] + off) * + regmap_get_reg_stride(priv->regs); } static u32 sgpio_readl(struct sgpio_priv *priv, u32 rno, u32 off) diff --git a/drivers/pinctrl/pinctrl-thunderbay.c b/drivers/pinctrl/pinctrl-thunderbay.c index b5b47f4dd77497370cecae0768f94e17b0f32e89..79d44bca039e1c65ecd667908463f37d12cf2a16 100644 --- a/drivers/pinctrl/pinctrl-thunderbay.c +++ b/drivers/pinctrl/pinctrl-thunderbay.c @@ -773,63 +773,42 @@ static int thunderbay_build_groups(struct thunderbay_pinctrl *tpc) static int thunderbay_add_functions(struct thunderbay_pinctrl *tpc, struct function_desc *funcs) { - struct function_desc *function = funcs; int i; /* Assign the groups for each function */ - for (i = 0; i < tpc->soc->npins; i++) { - const struct pinctrl_pin_desc *pin_info = thunderbay_pins + i; - struct thunderbay_mux_desc *pin_mux = pin_info->drv_data; - - while (pin_mux->name) { - const char **grp; - int j, grp_num, match = 0; - size_t grp_size; - struct function_desc *func; - - for (j = 0; j < tpc->nfuncs; j++) { - if (!strcmp(pin_mux->name, function[j].name)) { - match = 1; - break; - } - } - - if (!match) - return -EINVAL; - - func = function + j; - grp_num = func->num_group_names; - grp_size = sizeof(*func->group_names); - - if (!func->group_names) { - func->group_names = devm_kcalloc(tpc->dev, - grp_num, - grp_size, - GFP_KERNEL); - if (!func->group_names) { - kfree(func); - return -ENOMEM; - } + for (i = 0; i < tpc->nfuncs; i++) { + struct function_desc *func = &funcs[i]; + const char **group_names; + unsigned int grp_idx = 0; + int j; + + group_names = devm_kcalloc(tpc->dev, func->num_group_names, + sizeof(*group_names), GFP_KERNEL); + if (!group_names) + return -ENOMEM; + + for (j = 0; j < tpc->soc->npins; j++) { + const struct pinctrl_pin_desc *pin_info = &thunderbay_pins[j]; + struct thunderbay_mux_desc *pin_mux; + + for (pin_mux = pin_info->drv_data; pin_mux->name; pin_mux++) { + if (!strcmp(pin_mux->name, func->name)) + group_names[grp_idx++] = pin_info->name; } - - grp = func->group_names; - while (*grp) - grp++; - - *grp = pin_info->name; - pin_mux++; } + + func->group_names = group_names; } /* Add all functions */ for (i = 0; i < tpc->nfuncs; i++) { pinmux_generic_add_function(tpc->pctrl, - function[i].name, - function[i].group_names, - function[i].num_group_names, - function[i].data); + funcs[i].name, + funcs[i].group_names, + funcs[i].num_group_names, + funcs[i].data); } - kfree(function); + kfree(funcs); return 0; } @@ -839,27 +818,30 @@ static int thunderbay_build_functions(struct thunderbay_pinctrl *tpc) void *ptr; int pin; - /* Total number of functions is unknown at this point. Allocate first. */ + /* + * Allocate maximum possible number of functions. Assume every pin + * being part of 8 (hw maximum) globally unique muxes. + */ tpc->nfuncs = 0; thunderbay_funcs = kcalloc(tpc->soc->npins * 8, sizeof(*thunderbay_funcs), GFP_KERNEL); if (!thunderbay_funcs) return -ENOMEM; - /* Find total number of functions and each's properties */ + /* Setup 1 function for each unique mux */ for (pin = 0; pin < tpc->soc->npins; pin++) { const struct pinctrl_pin_desc *pin_info = thunderbay_pins + pin; - struct thunderbay_mux_desc *pin_mux = pin_info->drv_data; + struct thunderbay_mux_desc *pin_mux; - while (pin_mux->name) { - struct function_desc *func = thunderbay_funcs; + for (pin_mux = pin_info->drv_data; pin_mux->name; pin_mux++) { + struct function_desc *func; - while (func->name) { + /* Check if we already have function for this mux */ + for (func = thunderbay_funcs; func->name; func++) { if (!strcmp(pin_mux->name, func->name)) { func->num_group_names++; break; } - func++; } if (!func->name) { @@ -868,8 +850,6 @@ static int thunderbay_build_functions(struct thunderbay_pinctrl *tpc) func->data = (int *)&pin_mux->mode; tpc->nfuncs++; } - - pin_mux++; } } diff --git a/drivers/pinctrl/pinctrl-zynqmp.c b/drivers/pinctrl/pinctrl-zynqmp.c index 42da6bd399ee78950bc42f5a85d9e8d465a5ed42..e14012209992f997d086e15fc2462b08bbd6e938 100644 --- a/drivers/pinctrl/pinctrl-zynqmp.c +++ b/drivers/pinctrl/pinctrl-zynqmp.c @@ -809,7 +809,6 @@ static int zynqmp_pinctrl_prepare_pin_desc(struct device *dev, unsigned int *npins) { struct pinctrl_pin_desc *pins, *pin; - char **pin_names; int ret; int i; @@ -821,14 +820,13 @@ static int zynqmp_pinctrl_prepare_pin_desc(struct device *dev, if (!pins) return -ENOMEM; - pin_names = devm_kasprintf_strarray(dev, ZYNQMP_PIN_PREFIX, *npins); - if (IS_ERR(pin_names)) - return PTR_ERR(pin_names); - for (i = 0; i < *npins; i++) { pin = &pins[i]; pin->number = i; - pin->name = pin_names[i]; + pin->name = devm_kasprintf(dev, GFP_KERNEL, "%s%d", + ZYNQMP_PIN_PREFIX, i); + if (!pin->name) + return -ENOMEM; } *zynqmp_pins = pins; diff --git a/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c b/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c index ce1917e230f411999e4c3d06c435d2e8eb5505ca..152b71226a807f8c559e5a36e619de9aa75f17c2 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c +++ b/drivers/pinctrl/sunxi/pinctrl-sun50i-h616.c @@ -363,16 +363,16 @@ static const struct sunxi_desc_pin h616_pins[] = { SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), SUNXI_FUNCTION(0x2, "uart2"), /* CTS */ - SUNXI_FUNCTION(0x3, "i2s3"), /* DO0 */ + SUNXI_FUNCTION(0x3, "i2s3_dout0"), /* DO0 */ SUNXI_FUNCTION(0x4, "spi1"), /* MISO */ - SUNXI_FUNCTION(0x5, "i2s3"), /* DI1 */ + SUNXI_FUNCTION(0x5, "i2s3_din1"), /* DI1 */ SUNXI_FUNCTION_IRQ_BANK(0x6, 6, 8)), /* PH_EINT8 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 9), SUNXI_FUNCTION(0x0, "gpio_in"), SUNXI_FUNCTION(0x1, "gpio_out"), - SUNXI_FUNCTION(0x3, "i2s3"), /* DI0 */ + SUNXI_FUNCTION(0x3, "i2s3_din0"), /* DI0 */ SUNXI_FUNCTION(0x4, "spi1"), /* CS1 */ - SUNXI_FUNCTION(0x3, "i2s3"), /* DO1 */ + SUNXI_FUNCTION(0x5, "i2s3_dout1"), /* DO1 */ SUNXI_FUNCTION_IRQ_BANK(0x6, 6, 9)), /* PH_EINT9 */ SUNXI_PIN(SUNXI_PINCTRL_PIN(H, 10), SUNXI_FUNCTION(0x0, "gpio_in"), diff --git a/drivers/platform/surface/Kconfig b/drivers/platform/surface/Kconfig index 5f0578e25f718f141b11946748e4b68106a9d032..463f1ec5c14e9fa95ac6058f6c4fc5fcc466b121 100644 --- a/drivers/platform/surface/Kconfig +++ b/drivers/platform/surface/Kconfig @@ -5,6 +5,7 @@ menuconfig SURFACE_PLATFORMS bool "Microsoft Surface Platform-Specific Device Drivers" + depends on ARM64 || X86 || COMPILE_TEST default y help Say Y here to get to see options for platform-specific device drivers diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index f794343d6aaae157778bdbeaf1a176b72e3cdbb9..4c72ba68b315bbeea909a25f46d278e63efb9ea1 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -124,9 +124,10 @@ struct amd_pmc_dev { u32 cpu_id; u32 active_ips; /* SMU version information */ - u16 major; - u16 minor; - u16 rev; + u8 smu_program; + u8 major; + u8 minor; + u8 rev; struct device *dev; struct pci_dev *rdev; struct mutex lock; /* generic mutex lock */ @@ -180,11 +181,13 @@ static int amd_pmc_get_smu_version(struct amd_pmc_dev *dev) if (rc) return rc; - dev->major = (val >> 16) & GENMASK(15, 0); + dev->smu_program = (val >> 24) & GENMASK(7, 0); + dev->major = (val >> 16) & GENMASK(7, 0); dev->minor = (val >> 8) & GENMASK(7, 0); dev->rev = (val >> 0) & GENMASK(7, 0); - dev_dbg(dev->dev, "SMU version is %u.%u.%u\n", dev->major, dev->minor, dev->rev); + dev_dbg(dev->dev, "SMU program %u version is %u.%u.%u\n", + dev->smu_program, dev->major, dev->minor, dev->rev); return 0; } @@ -226,7 +229,7 @@ static int amd_pmc_stb_debugfs_release(struct inode *inode, struct file *filp) return 0; } -const struct file_operations amd_pmc_stb_debugfs_fops = { +static const struct file_operations amd_pmc_stb_debugfs_fops = { .owner = THIS_MODULE, .open = amd_pmc_stb_debugfs_open, .read = amd_pmc_stb_debugfs_read, diff --git a/drivers/platform/x86/asus-tf103c-dock.c b/drivers/platform/x86/asus-tf103c-dock.c index d4ef8f362ee6816499ec0b563bd98873605d5f6d..6fd0c9fea82da144f29b5566cfcc67893e4f06c5 100644 --- a/drivers/platform/x86/asus-tf103c-dock.c +++ b/drivers/platform/x86/asus-tf103c-dock.c @@ -250,7 +250,7 @@ static int tf103c_dock_hid_raw_request(struct hid_device *hid, u8 reportnum, return 0; } -struct hid_ll_driver tf103c_dock_hid_ll_driver = { +static struct hid_ll_driver tf103c_dock_hid_ll_driver = { .parse = tf103c_dock_hid_parse, .start = tf103c_dock_hid_start, .stop = tf103c_dock_hid_stop, @@ -921,7 +921,7 @@ static int __maybe_unused tf103c_dock_resume(struct device *dev) return 0; } -SIMPLE_DEV_PM_OPS(tf103c_dock_pm_ops, tf103c_dock_suspend, tf103c_dock_resume); +static SIMPLE_DEV_PM_OPS(tf103c_dock_pm_ops, tf103c_dock_suspend, tf103c_dock_resume); static const struct acpi_device_id tf103c_dock_acpi_match[] = { {"NPCE69A"}, diff --git a/drivers/platform/x86/intel/crystal_cove_charger.c b/drivers/platform/x86/intel/crystal_cove_charger.c index 0374bc742513abf56b2e2939638ca1b7e749b5f0..e4299cfa22054510c298f5f1af890b0b0e92a6cf 100644 --- a/drivers/platform/x86/intel/crystal_cove_charger.c +++ b/drivers/platform/x86/intel/crystal_cove_charger.c @@ -17,6 +17,7 @@ #include #define CHGRIRQ_REG 0x0a +#define MCHGRIRQ_REG 0x17 struct crystal_cove_charger_data { struct mutex buslock; /* irq_bus_lock */ @@ -25,8 +26,8 @@ struct crystal_cove_charger_data { struct irq_domain *irq_domain; int irq; int charger_irq; - bool irq_enabled; - bool irq_is_enabled; + u8 mask; + u8 new_mask; }; static irqreturn_t crystal_cove_charger_irq(int irq, void *data) @@ -53,13 +54,9 @@ static void crystal_cove_charger_irq_bus_sync_unlock(struct irq_data *data) { struct crystal_cove_charger_data *charger = irq_data_get_irq_chip_data(data); - if (charger->irq_is_enabled != charger->irq_enabled) { - if (charger->irq_enabled) - enable_irq(charger->irq); - else - disable_irq(charger->irq); - - charger->irq_is_enabled = charger->irq_enabled; + if (charger->mask != charger->new_mask) { + regmap_write(charger->regmap, MCHGRIRQ_REG, charger->new_mask); + charger->mask = charger->new_mask; } mutex_unlock(&charger->buslock); @@ -69,14 +66,14 @@ static void crystal_cove_charger_irq_unmask(struct irq_data *data) { struct crystal_cove_charger_data *charger = irq_data_get_irq_chip_data(data); - charger->irq_enabled = true; + charger->new_mask &= ~BIT(data->hwirq); } static void crystal_cove_charger_irq_mask(struct irq_data *data) { struct crystal_cove_charger_data *charger = irq_data_get_irq_chip_data(data); - charger->irq_enabled = false; + charger->new_mask |= BIT(data->hwirq); } static void crystal_cove_charger_rm_irq_domain(void *data) @@ -130,10 +127,13 @@ static int crystal_cove_charger_probe(struct platform_device *pdev) irq_set_nested_thread(charger->charger_irq, true); irq_set_noprobe(charger->charger_irq); + /* Mask the single 2nd level IRQ before enabling the 1st level IRQ */ + charger->mask = charger->new_mask = BIT(0); + regmap_write(charger->regmap, MCHGRIRQ_REG, charger->mask); + ret = devm_request_threaded_irq(&pdev->dev, charger->irq, NULL, crystal_cove_charger_irq, - IRQF_ONESHOT | IRQF_NO_AUTOEN, - KBUILD_MODNAME, charger); + IRQF_ONESHOT, KBUILD_MODNAME, charger); if (ret) return dev_err_probe(&pdev->dev, ret, "requesting irq\n"); diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c index c9a85eb2e8600a2d362749de6586d9c274bc35dc..e8424e70d81d2d1b425cc95d6f02b7b8189d1fa3 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c @@ -596,7 +596,10 @@ static long isst_if_def_ioctl(struct file *file, unsigned int cmd, return ret; } -static DEFINE_MUTEX(punit_misc_dev_lock); +/* Lock to prevent module registration when already opened by user space */ +static DEFINE_MUTEX(punit_misc_dev_open_lock); +/* Lock to allow one share misc device for all ISST interace */ +static DEFINE_MUTEX(punit_misc_dev_reg_lock); static int misc_usage_count; static int misc_device_ret; static int misc_device_open; @@ -606,7 +609,7 @@ static int isst_if_open(struct inode *inode, struct file *file) int i, ret = 0; /* Fail open, if a module is going away */ - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -628,7 +631,7 @@ static int isst_if_open(struct inode *inode, struct file *file) } else { misc_device_open++; } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return ret; } @@ -637,7 +640,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) { int i; - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); misc_device_open--; for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -645,7 +648,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) if (cb->registered) module_put(cb->owner); } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return 0; } @@ -662,6 +665,43 @@ static struct miscdevice isst_if_char_driver = { .fops = &isst_if_char_driver_ops, }; +static int isst_misc_reg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_device_ret) + goto unlock_exit; + + if (!misc_usage_count) { + misc_device_ret = isst_if_cpu_info_init(); + if (misc_device_ret) + goto unlock_exit; + + misc_device_ret = misc_register(&isst_if_char_driver); + if (misc_device_ret) { + isst_if_cpu_info_exit(); + goto unlock_exit; + } + } + misc_usage_count++; + +unlock_exit: + mutex_unlock(&punit_misc_dev_reg_lock); + + return misc_device_ret; +} + +static void isst_misc_unreg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_usage_count) + misc_usage_count--; + if (!misc_usage_count && !misc_device_ret) { + misc_deregister(&isst_if_char_driver); + isst_if_cpu_info_exit(); + } + mutex_unlock(&punit_misc_dev_reg_lock); +} + /** * isst_if_cdev_register() - Register callback for IOCTL * @device_type: The device type this callback handling. @@ -679,38 +719,31 @@ static struct miscdevice isst_if_char_driver = { */ int isst_if_cdev_register(int device_type, struct isst_if_cmd_cb *cb) { - if (misc_device_ret) - return misc_device_ret; + int ret; if (device_type >= ISST_IF_DEV_MAX) return -EINVAL; - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); + /* Device is already open, we don't want to add new callbacks */ if (misc_device_open) { - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return -EAGAIN; } - if (!misc_usage_count) { - int ret; - - misc_device_ret = misc_register(&isst_if_char_driver); - if (misc_device_ret) - goto unlock_exit; - - ret = isst_if_cpu_info_init(); - if (ret) { - misc_deregister(&isst_if_char_driver); - misc_device_ret = ret; - goto unlock_exit; - } - } memcpy(&punit_callbacks[device_type], cb, sizeof(*cb)); punit_callbacks[device_type].registered = 1; - misc_usage_count++; -unlock_exit: - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); - return misc_device_ret; + ret = isst_misc_reg(); + if (ret) { + /* + * No need of mutex as the misc device register failed + * as no one can open device yet. Hence no contention. + */ + punit_callbacks[device_type].registered = 0; + return ret; + } + return 0; } EXPORT_SYMBOL_GPL(isst_if_cdev_register); @@ -725,16 +758,12 @@ EXPORT_SYMBOL_GPL(isst_if_cdev_register); */ void isst_if_cdev_unregister(int device_type) { - mutex_lock(&punit_misc_dev_lock); - misc_usage_count--; + isst_misc_unreg(); + mutex_lock(&punit_misc_dev_open_lock); punit_callbacks[device_type].registered = 0; if (device_type == ISST_IF_DEV_MBOX) isst_delete_hash(); - if (!misc_usage_count && !misc_device_ret) { - misc_deregister(&isst_if_char_driver); - isst_if_cpu_info_exit(); - } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); } EXPORT_SYMBOL_GPL(isst_if_cdev_unregister); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 82fa6148216c7118ca8f3f9673e7b8549f1cc1b3..bd045486b933fd26d345cf93abcdebdeb6fa2660 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -880,14 +880,14 @@ static int dispatch_proc_show(struct seq_file *m, void *v) static int dispatch_proc_open(struct inode *inode, struct file *file) { - return single_open(file, dispatch_proc_show, PDE_DATA(inode)); + return single_open(file, dispatch_proc_show, pde_data(inode)); } static ssize_t dispatch_proc_write(struct file *file, const char __user *userbuf, size_t count, loff_t *pos) { - struct ibm_struct *ibm = PDE_DATA(file_inode(file)); + struct ibm_struct *ibm = pde_data(file_inode(file)); char *kernbuf; int ret; @@ -8679,9 +8679,10 @@ static const struct attribute_group fan_driver_attr_group = { .attrs = fan_driver_attributes, }; -#define TPACPI_FAN_Q1 0x0001 /* Unitialized HFSP */ -#define TPACPI_FAN_2FAN 0x0002 /* EC 0x31 bit 0 selects fan2 */ -#define TPACPI_FAN_2CTL 0x0004 /* selects fan2 control */ +#define TPACPI_FAN_Q1 0x0001 /* Uninitialized HFSP */ +#define TPACPI_FAN_2FAN 0x0002 /* EC 0x31 bit 0 selects fan2 */ +#define TPACPI_FAN_2CTL 0x0004 /* selects fan2 control */ +#define TPACPI_FAN_NOFAN 0x0008 /* no fan available */ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_QEC_IBM('1', 'Y', TPACPI_FAN_Q1), @@ -8702,6 +8703,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '4', '0', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (4nd gen) */ TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL), /* P15 (1st gen) / P15v (1st gen) */ TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL), /* X1 Carbon (9th gen) */ + TPACPI_Q_LNV3('N', '1', 'O', TPACPI_FAN_NOFAN), /* X1 Tablet (2nd gen) */ }; static int __init fan_init(struct ibm_init_struct *iibm) @@ -8730,6 +8732,11 @@ static int __init fan_init(struct ibm_init_struct *iibm) quirks = tpacpi_check_quirks(fan_quirk_table, ARRAY_SIZE(fan_quirk_table)); + if (quirks & TPACPI_FAN_NOFAN) { + pr_info("No integrated ThinkPad fan available\n"); + return -ENODEV; + } + if (gfan_handle) { /* 570, 600e/x, 770e, 770x */ fan_status_access_mode = TPACPI_FAN_RD_ACPI_GFAN; @@ -10112,6 +10119,9 @@ static struct ibm_struct proxsensor_driver_data = { #define DYTC_CMD_MMC_GET 8 /* To get current MMC function and mode */ #define DYTC_CMD_RESET 0x1ff /* To reset back to default */ +#define DYTC_CMD_FUNC_CAP 3 /* To get DYTC capabilities */ +#define DYTC_FC_MMC 27 /* MMC Mode supported */ + #define DYTC_GET_FUNCTION_BIT 8 /* Bits 8-11 - function setting */ #define DYTC_GET_MODE_BIT 12 /* Bits 12-15 - mode setting */ @@ -10324,6 +10334,15 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm) if (dytc_version < 5) return -ENODEV; + /* Check what capabilities are supported. Currently MMC is needed */ + err = dytc_command(DYTC_CMD_FUNC_CAP, &output); + if (err) + return err; + if (!(output & BIT(DYTC_FC_MMC))) { + dbg_printk(TPACPI_DBG_INIT, " DYTC MMC mode not supported\n"); + return -ENODEV; + } + dbg_printk(TPACPI_DBG_INIT, "DYTC version %d: thermal mode available\n", dytc_version); /* diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c index 352508d30467535ba12d4826292a818db52f40d3..f113dec98e21dbd37f6a977840ebe68fb491cd30 100644 --- a/drivers/platform/x86/toshiba_acpi.c +++ b/drivers/platform/x86/toshiba_acpi.c @@ -1368,7 +1368,7 @@ static int lcd_proc_show(struct seq_file *m, void *v) static int lcd_proc_open(struct inode *inode, struct file *file) { - return single_open(file, lcd_proc_show, PDE_DATA(inode)); + return single_open(file, lcd_proc_show, pde_data(inode)); } static int set_lcd_brightness(struct toshiba_acpi_dev *dev, int value) @@ -1404,7 +1404,7 @@ static int set_lcd_status(struct backlight_device *bd) static ssize_t lcd_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file)); + struct toshiba_acpi_dev *dev = pde_data(file_inode(file)); char cmd[42]; size_t len; int levels; @@ -1469,13 +1469,13 @@ static int video_proc_show(struct seq_file *m, void *v) static int video_proc_open(struct inode *inode, struct file *file) { - return single_open(file, video_proc_show, PDE_DATA(inode)); + return single_open(file, video_proc_show, pde_data(inode)); } static ssize_t video_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file)); + struct toshiba_acpi_dev *dev = pde_data(file_inode(file)); char *buffer; char *cmd; int lcd_out = -1, crt_out = -1, tv_out = -1; @@ -1580,13 +1580,13 @@ static int fan_proc_show(struct seq_file *m, void *v) static int fan_proc_open(struct inode *inode, struct file *file) { - return single_open(file, fan_proc_show, PDE_DATA(inode)); + return single_open(file, fan_proc_show, pde_data(inode)); } static ssize_t fan_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file)); + struct toshiba_acpi_dev *dev = pde_data(file_inode(file)); char cmd[42]; size_t len; int value; @@ -1628,13 +1628,13 @@ static int keys_proc_show(struct seq_file *m, void *v) static int keys_proc_open(struct inode *inode, struct file *file) { - return single_open(file, keys_proc_show, PDE_DATA(inode)); + return single_open(file, keys_proc_show, pde_data(inode)); } static ssize_t keys_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - struct toshiba_acpi_dev *dev = PDE_DATA(file_inode(file)); + struct toshiba_acpi_dev *dev = pde_data(file_inode(file)); char cmd[42]; size_t len; int value; diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 494f23052678691f6424e4f426e05296d619492e..bc97bfa8e8a65f548194f8028851a64151f2c8e9 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -770,6 +770,21 @@ static const struct ts_dmi_data predia_basic_data = { .properties = predia_basic_props, }; +static const struct property_entry rwc_nanote_p8_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-y", 46), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1728), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rwc-nanote-p8.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data rwc_nanote_p8_data = { + .acpi_name = "MSSL1680:00", + .properties = rwc_nanote_p8_props, +}; + static const struct property_entry schneider_sct101ctm_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1715), PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), @@ -1394,6 +1409,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "0E57"), }, }, + { + /* RWC NANOTE P8 */ + .driver_data = (void *)&rwc_nanote_p8_data, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Default string"), + DMI_MATCH(DMI_PRODUCT_NAME, "AY07J"), + DMI_MATCH(DMI_PRODUCT_SKU, "0001") + }, + }, { /* Schneider SCT101CTM */ .driver_data = (void *)&schneider_sct101ctm_data, diff --git a/drivers/platform/x86/x86-android-tablets.c b/drivers/platform/x86/x86-android-tablets.c index 3ba63ad91b28b4bc1ce4ce989a4bfe9192757075..9360a8a92486a81b4e6ff2254921bc9c2b5a796a 100644 --- a/drivers/platform/x86/x86-android-tablets.c +++ b/drivers/platform/x86/x86-android-tablets.c @@ -26,6 +26,7 @@ #include /* For gpio_get_desc() which is EXPORT_SYMBOL_GPL() */ #include "../../gpio/gpiolib.h" +#include "../../gpio/gpiolib-acpi.h" /* * Helper code to get Linux IRQ numbers given a description of the IRQ source @@ -47,7 +48,7 @@ struct x86_acpi_irq_data { int polarity; /* ACPI_ACTIVE_HIGH / ACPI_ACTIVE_LOW / ACPI_ACTIVE_BOTH */ }; -static int x86_acpi_irq_helper_gpiochip_find(struct gpio_chip *gc, void *data) +static int gpiochip_find_match_label(struct gpio_chip *gc, void *data) { return gc->label && !strcmp(gc->label, data); } @@ -73,7 +74,7 @@ static int x86_acpi_irq_helper_get(const struct x86_acpi_irq_data *data) return irq; case X86_ACPI_IRQ_TYPE_GPIOINT: /* Like acpi_dev_gpio_irq_get(), but without parsing ACPI resources */ - chip = gpiochip_find(data->chip, x86_acpi_irq_helper_gpiochip_find); + chip = gpiochip_find(data->chip, gpiochip_find_match_label); if (!chip) { pr_err("error cannot find GPIO chip %s\n", data->chip); return -ENODEV; @@ -143,14 +144,17 @@ struct x86_serdev_info { }; struct x86_dev_info { + char *invalid_aei_gpiochip; const char * const *modules; - struct gpiod_lookup_table **gpiod_lookup_tables; + struct gpiod_lookup_table * const *gpiod_lookup_tables; const struct x86_i2c_client_info *i2c_client_info; const struct platform_device_info *pdev_info; const struct x86_serdev_info *serdev_info; int i2c_client_count; int pdev_count; int serdev_count; + int (*init)(void); + void (*exit)(void); }; /* Generic / shared bq24190 settings */ @@ -187,8 +191,8 @@ static struct bq24190_platform_data bq24190_pdata = { }; static const char * const bq24190_modules[] __initconst = { - "crystal_cove_charger", /* For the bq24190 IRQ */ - "bq24190_charger", /* For the Vbus regulator for intel-int3496 */ + "intel_crystal_cove_charger", /* For the bq24190 IRQ */ + "bq24190_charger", /* For the Vbus regulator for intel-int3496 */ NULL }; @@ -302,7 +306,7 @@ static struct gpiod_lookup_table asus_me176c_goodix_gpios = { }, }; -static struct gpiod_lookup_table *asus_me176c_gpios[] = { +static struct gpiod_lookup_table * const asus_me176c_gpios[] = { &int3496_gpo2_pin22_gpios, &asus_me176c_goodix_gpios, NULL @@ -317,6 +321,7 @@ static const struct x86_dev_info asus_me176c_info __initconst = { .serdev_count = ARRAY_SIZE(asus_me176c_serdevs), .gpiod_lookup_tables = asus_me176c_gpios, .modules = bq24190_modules, + .invalid_aei_gpiochip = "INT33FC:02", }; /* Asus TF103C tablets have an Android factory img with everything hardcoded */ @@ -405,7 +410,7 @@ static const struct x86_i2c_client_info asus_tf103c_i2c_clients[] __initconst = }, }; -static struct gpiod_lookup_table *asus_tf103c_gpios[] = { +static struct gpiod_lookup_table * const asus_tf103c_gpios[] = { &int3496_gpo2_pin22_gpios, NULL }; @@ -417,6 +422,7 @@ static const struct x86_dev_info asus_tf103c_info __initconst = { .pdev_count = ARRAY_SIZE(int3496_pdevs), .gpiod_lookup_tables = asus_tf103c_gpios, .modules = bq24190_modules, + .invalid_aei_gpiochip = "INT33FC:02", }; /* @@ -490,6 +496,39 @@ static const struct x86_dev_info chuwi_hi8_info __initconst = { .i2c_client_count = ARRAY_SIZE(chuwi_hi8_i2c_clients), }; +#define CZC_EC_EXTRA_PORT 0x68 +#define CZC_EC_ANDROID_KEYS 0x63 + +static int __init czc_p10t_init(void) +{ + /* + * The device boots up in "Windows 7" mode, when the home button sends a + * Windows specific key sequence (Left Meta + D) and the second button + * sends an unknown one while also toggling the Radio Kill Switch. + * This is a surprising behavior when the second button is labeled "Back". + * + * The vendor-supplied Android-x86 build switches the device to a "Android" + * mode by writing value 0x63 to the I/O port 0x68. This just seems to just + * set bit 6 on address 0x96 in the EC region; switching the bit directly + * seems to achieve the same result. It uses a "p10t_switcher" to do the + * job. It doesn't seem to be able to do anything else, and no other use + * of the port 0x68 is known. + * + * In the Android mode, the home button sends just a single scancode, + * which can be handled in Linux userspace more reasonably and the back + * button only sends a scancode without toggling the kill switch. + * The scancode can then be mapped either to Back or RF Kill functionality + * in userspace, depending on how the button is labeled on that particular + * model. + */ + outb(CZC_EC_ANDROID_KEYS, CZC_EC_EXTRA_PORT); + return 0; +} + +static const struct x86_dev_info czc_p10t __initconst = { + .init = czc_p10t_init, +}; + /* * Whitelabel (sold as various brands) TM800A550L tablets. * These tablet's DSDT contains a whole bunch of bogus ACPI I2C devices @@ -559,7 +598,7 @@ static struct gpiod_lookup_table whitelabel_tm800a550l_goodix_gpios = { }, }; -static struct gpiod_lookup_table *whitelabel_tm800a550l_gpios[] = { +static struct gpiod_lookup_table * const whitelabel_tm800a550l_gpios[] = { &whitelabel_tm800a550l_goodix_gpios, NULL }; @@ -641,6 +680,24 @@ static const struct dmi_system_id x86_android_tablet_ids[] __initconst = { }, .driver_data = (void *)&chuwi_hi8_info, }, + { + /* CZC P10T */ + .ident = "CZC ODEON TPC-10 (\"P10T\")", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "CZC"), + DMI_MATCH(DMI_PRODUCT_NAME, "ODEON*TPC-10"), + }, + .driver_data = (void *)&czc_p10t, + }, + { + /* A variant of CZC P10T */ + .ident = "ViewSonic ViewPad 10", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ViewSonic"), + DMI_MATCH(DMI_PRODUCT_NAME, "VPAD10"), + }, + .driver_data = (void *)&czc_p10t, + }, { /* Whitelabel (sold as various brands) TM800A550L */ .matches = { @@ -669,7 +726,8 @@ static int serdev_count; static struct i2c_client **i2c_clients; static struct platform_device **pdevs; static struct serdev_device **serdevs; -static struct gpiod_lookup_table **gpiod_lookup_tables; +static struct gpiod_lookup_table * const *gpiod_lookup_tables; +static void (*exit_handler)(void); static __init int x86_instantiate_i2c_client(const struct x86_dev_info *dev_info, int idx) @@ -787,6 +845,9 @@ static void x86_android_tablet_cleanup(void) kfree(i2c_clients); + if (exit_handler) + exit_handler(); + for (i = 0; gpiod_lookup_tables && gpiod_lookup_tables[i]; i++) gpiod_remove_lookup_table(gpiod_lookup_tables[i]); } @@ -795,6 +856,7 @@ static __init int x86_android_tablet_init(void) { const struct x86_dev_info *dev_info; const struct dmi_system_id *id; + struct gpio_chip *chip; int i, ret = 0; id = dmi_first_match(x86_android_tablet_ids); @@ -803,6 +865,20 @@ static __init int x86_android_tablet_init(void) dev_info = id->driver_data; + /* + * The broken DSDTs on these devices often also include broken + * _AEI (ACPI Event Interrupt) handlers, disable these. + */ + if (dev_info->invalid_aei_gpiochip) { + chip = gpiochip_find(dev_info->invalid_aei_gpiochip, + gpiochip_find_match_label); + if (!chip) { + pr_err("error cannot find GPIO chip %s\n", dev_info->invalid_aei_gpiochip); + return -ENODEV; + } + acpi_gpiochip_free_interrupts(chip); + } + /* * Since this runs from module_init() it cannot use -EPROBE_DEFER, * instead pre-load any modules which are listed as requirements. @@ -814,6 +890,15 @@ static __init int x86_android_tablet_init(void) for (i = 0; gpiod_lookup_tables && gpiod_lookup_tables[i]; i++) gpiod_add_lookup_table(gpiod_lookup_tables[i]); + if (dev_info->init) { + ret = dev_info->init(); + if (ret < 0) { + x86_android_tablet_cleanup(); + return ret; + } + exit_handler = dev_info->exit; + } + i2c_clients = kcalloc(dev_info->i2c_client_count, sizeof(*i2c_clients), GFP_KERNEL); if (!i2c_clients) { x86_android_tablet_cleanup(); @@ -865,6 +950,6 @@ static __init int x86_android_tablet_init(void) module_init(x86_android_tablet_init); module_exit(x86_android_tablet_cleanup); -MODULE_AUTHOR("Hans de Goede "); MODULE_DESCRIPTION("X86 Android tablets DSDT fixups driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/pnp/isapnp/proc.c b/drivers/pnp/isapnp/proc.c index 1ae458c02656cb152ec1cd7fea49d1f0d108446e..55ae72a2818bbfa79230bea808dfd86c4bdfb2e1 100644 --- a/drivers/pnp/isapnp/proc.c +++ b/drivers/pnp/isapnp/proc.c @@ -22,7 +22,7 @@ static loff_t isapnp_proc_bus_lseek(struct file *file, loff_t off, int whence) static ssize_t isapnp_proc_bus_read(struct file *file, char __user * buf, size_t nbytes, loff_t * ppos) { - struct pnp_dev *dev = PDE_DATA(file_inode(file)); + struct pnp_dev *dev = pde_data(file_inode(file)); int pos = *ppos; int cnt, size = 256; diff --git a/drivers/pnp/pnpbios/proc.c b/drivers/pnp/pnpbios/proc.c index a806830e3a407f294a4764bb40f0cfc934b8ed18..0f0d819b157fd8c8fd7769f6cd7818fc01e8ece4 100644 --- a/drivers/pnp/pnpbios/proc.c +++ b/drivers/pnp/pnpbios/proc.c @@ -173,13 +173,13 @@ static int pnpbios_proc_show(struct seq_file *m, void *v) static int pnpbios_proc_open(struct inode *inode, struct file *file) { - return single_open(file, pnpbios_proc_show, PDE_DATA(inode)); + return single_open(file, pnpbios_proc_show, pde_data(inode)); } static ssize_t pnpbios_proc_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { - void *data = PDE_DATA(file_inode(file)); + void *data = pde_data(file_inode(file)); struct pnp_bios_node *node; int boot = (long)data >> 8; u8 nodenum = (long)data; diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 86aa4141efa9242c93de70a294afe132eb7c4e2b..d2553970a67ba73e1a7f2d75b3ca49d02f0317ea 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -6014,9 +6014,8 @@ core_initcall(regulator_init); static int regulator_late_cleanup(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); - const struct regulator_ops *ops = rdev->desc->ops; struct regulation_constraints *c = rdev->constraints; - int enabled, ret; + int ret; if (c && c->always_on) return 0; @@ -6029,14 +6028,8 @@ static int regulator_late_cleanup(struct device *dev, void *data) if (rdev->use_count) goto unlock; - /* If we can't read the status assume it's always on. */ - if (ops->is_enabled) - enabled = ops->is_enabled(rdev); - else - enabled = 1; - - /* But if reading the status failed, assume that it's off. */ - if (enabled <= 0) + /* If reading the status failed, assume that it's off. */ + if (_regulator_is_enabled(rdev) <= 0) goto unlock; if (have_full_constraints()) { diff --git a/drivers/regulator/max20086-regulator.c b/drivers/regulator/max20086-regulator.c index fbc56b043071349720bf8f2d2b36589213d322b0..b8bf76c170fead75dd5751c309a613e360b618c2 100644 --- a/drivers/regulator/max20086-regulator.c +++ b/drivers/regulator/max20086-regulator.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -140,7 +141,7 @@ static int max20086_parse_regulators_dt(struct max20086 *chip, bool *boot_on) node = of_get_child_by_name(chip->dev->of_node, "regulators"); if (!node) { dev_err(chip->dev, "regulators node not found\n"); - return PTR_ERR(node); + return -ENODEV; } for (i = 0; i < chip->info->num_outputs; ++i) diff --git a/drivers/remoteproc/Kconfig b/drivers/remoteproc/Kconfig index 3ddd426fc969b6ae4ab84c40acff13493d693e26..1660197866531d79dc31d7cef0b011ae1c4343d9 100644 --- a/drivers/remoteproc/Kconfig +++ b/drivers/remoteproc/Kconfig @@ -180,6 +180,7 @@ config QCOM_Q6V5_ADSP depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SYSMON || QCOM_SYSMON=n depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n select MFD_SYSCON select QCOM_PIL_INFO select QCOM_MDT_LOADER @@ -199,6 +200,7 @@ config QCOM_Q6V5_MSS depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SYSMON || QCOM_SYSMON=n depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n select MFD_SYSCON select QCOM_MDT_LOADER select QCOM_PIL_INFO @@ -218,6 +220,7 @@ config QCOM_Q6V5_PAS depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SYSMON || QCOM_SYSMON=n depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n select MFD_SYSCON select QCOM_PIL_INFO select QCOM_MDT_LOADER @@ -239,6 +242,7 @@ config QCOM_Q6V5_WCSS depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n depends on QCOM_SYSMON || QCOM_SYSMON=n depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n + depends on QCOM_AOSS_QMP || QCOM_AOSS_QMP=n select MFD_SYSCON select QCOM_MDT_LOADER select QCOM_PIL_INFO diff --git a/drivers/remoteproc/qcom_q6v5.c b/drivers/remoteproc/qcom_q6v5.c index eada7e34f3af51d7f5b0da137b8626d146bfc957..442a388f8102893a73171b5a8e42285138016136 100644 --- a/drivers/remoteproc/qcom_q6v5.c +++ b/drivers/remoteproc/qcom_q6v5.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c index d6214cb660262155877b3ba7d7ed56bdc073b976..5663cf799c95306432954e59cf3eb065ee4abd7c 100644 --- a/drivers/rpmsg/rpmsg_char.c +++ b/drivers/rpmsg/rpmsg_char.c @@ -93,7 +93,7 @@ static int rpmsg_eptdev_destroy(struct device *dev, void *data) /* wake up any blocked readers */ wake_up_interruptible(&eptdev->readq); - device_del(&eptdev->dev); + cdev_device_del(&eptdev->cdev, &eptdev->dev); put_device(&eptdev->dev); return 0; @@ -336,7 +336,6 @@ static void rpmsg_eptdev_release_device(struct device *dev) ida_simple_remove(&rpmsg_ept_ida, dev->id); ida_simple_remove(&rpmsg_minor_ida, MINOR(eptdev->dev.devt)); - cdev_del(&eptdev->cdev); kfree(eptdev); } @@ -381,19 +380,13 @@ static int rpmsg_eptdev_create(struct rpmsg_ctrldev *ctrldev, dev->id = ret; dev_set_name(dev, "rpmsg%d", ret); - ret = cdev_add(&eptdev->cdev, dev->devt, 1); + ret = cdev_device_add(&eptdev->cdev, &eptdev->dev); if (ret) goto free_ept_ida; /* We can now rely on the release function for cleanup */ dev->release = rpmsg_eptdev_release_device; - ret = device_add(dev); - if (ret) { - dev_err(dev, "device_add failed: %d\n", ret); - put_device(dev); - } - return ret; free_ept_ida: @@ -462,7 +455,6 @@ static void rpmsg_ctrldev_release_device(struct device *dev) ida_simple_remove(&rpmsg_ctrl_ida, dev->id); ida_simple_remove(&rpmsg_minor_ida, MINOR(dev->devt)); - cdev_del(&ctrldev->cdev); kfree(ctrldev); } @@ -497,19 +489,13 @@ static int rpmsg_chrdev_probe(struct rpmsg_device *rpdev) dev->id = ret; dev_set_name(&ctrldev->dev, "rpmsg_ctrl%d", ret); - ret = cdev_add(&ctrldev->cdev, dev->devt, 1); + ret = cdev_device_add(&ctrldev->cdev, &ctrldev->dev); if (ret) goto free_ctrl_ida; /* We can now rely on the release function for cleanup */ dev->release = rpmsg_ctrldev_release_device; - ret = device_add(dev); - if (ret) { - dev_err(&rpdev->dev, "device_add failed: %d\n", ret); - put_device(dev); - } - dev_set_drvdata(&rpdev->dev, ctrldev); return ret; @@ -535,7 +521,7 @@ static void rpmsg_chrdev_remove(struct rpmsg_device *rpdev) if (ret) dev_warn(&rpdev->dev, "failed to nuke endpoints: %d\n", ret); - device_del(&ctrldev->dev); + cdev_device_del(&ctrldev->cdev, &ctrldev->dev); put_device(&ctrldev->dev); } diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index cd938a26b76c2396632edd5196b295dcddc4c7e5..3b1cd0c96a74b85d53f218ec517fc3eb525b0524 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -1180,7 +1180,7 @@ static int io_subchannel_chp_event(struct subchannel *sch, else path_event[chpid] = PE_NONE; } - if (cdev) + if (cdev && cdev->drv && cdev->drv->path_event) cdev->drv->path_event(cdev, path_event); break; } diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index d24cafe02708fa941346510c6fd8aac21f1110e3..511bf8e0a436c2eb40f2601c6a68edf1724ea887 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -521,6 +521,8 @@ static void zfcp_fc_adisc_handler(void *data) goto out; } + /* re-init to undo drop from zfcp_fc_adisc() */ + port->d_id = ntoh24(adisc_resp->adisc_port_id); /* port is good, unblock rport without going through erp */ zfcp_scsi_schedule_rport_register(port); out: @@ -534,6 +536,7 @@ static int zfcp_fc_adisc(struct zfcp_port *port) struct zfcp_fc_req *fc_req; struct zfcp_adapter *adapter = port->adapter; struct Scsi_Host *shost = adapter->scsi_host; + u32 d_id; int ret; fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC); @@ -558,7 +561,15 @@ static int zfcp_fc_adisc(struct zfcp_port *port) fc_req->u.adisc.req.adisc_cmd = ELS_ADISC; hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost)); - ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els, + d_id = port->d_id; /* remember as destination for send els below */ + /* + * Force fresh GID_PN lookup on next port recovery. + * Must happen after request setup and before sending request, + * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler(). + */ + port->d_id = 0; + + ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els, ZFCP_FC_CTELS_TMO); if (ret) kmem_cache_free(zfcp_fc_req_cache, fc_req); diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c index b9482da79512fcb62d7f09116d4ed6db9e652d56..3ebe66151dcb2f5d7a1caf8e86ce60a3b8ab1099 100644 --- a/drivers/scsi/3w-sas.c +++ b/drivers/scsi/3w-sas.c @@ -1567,8 +1567,6 @@ static int twl_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) pci_try_set_mwi(pdev); retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (retval) - retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (retval) { TW_PRINTK(host, TW_DRIVER, 0x18, "Failed to set dma mask"); retval = -ENODEV; @@ -1786,8 +1784,6 @@ static int __maybe_unused twl_resume(struct device *dev) pci_try_set_mwi(pdev); retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (retval) - retval = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (retval) { TW_PRINTK(host, TW_DRIVER, 0x25, "Failed to set dma mask during resume"); retval = -ENODEV; diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 3ad3ebaca8e99a3dc52876547283649e08d1f987..ad4972c0fc534a1d0cd3e1056d0bea3ac63fcce3 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -1507,7 +1507,6 @@ NCR_700_intr(int irq, void *dev_id) struct scsi_cmnd *SCp = hostdata->cmd; handled = 1; - SCp = hostdata->cmd; if(istat & SCSI_INT_PENDING) { udelay(10); diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index 59f6b7b2a70a139b11bfa52da317f4e2fa49ca4b..b04d039da2767e9af0323bc73c4bd8c26f8ea7b9 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -271,7 +271,7 @@ MODULE_PARM_DESC(msi, "IRQ handling." " 0=PIC(default), 1=MSI, 2=MSI-X)"); module_param(startup_timeout, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(startup_timeout, "The duration of time in seconds to wait for" - " adapter to have it's kernel up and\n" + " adapter to have its kernel up and\n" "running. This is typically adjusted for large systems that do not" " have a BIOS."); module_param(aif_timeout, int, S_IRUGO|S_IWUSR); diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c index 5d566d2b2997c88fba1bcef01599aac2f0678514..928099163f0f6f9fbb40322b723fff7ad13204e0 100644 --- a/drivers/scsi/aic7xxx/aic79xx_osm.c +++ b/drivers/scsi/aic7xxx/aic79xx_osm.c @@ -755,11 +755,7 @@ ahd_linux_biosparam(struct scsi_device *sdev, struct block_device *bdev, static int ahd_linux_abort(struct scsi_cmnd *cmd) { - int error; - - error = ahd_linux_queue_abort_cmd(cmd); - - return error; + return ahd_linux_queue_abort_cmd(cmd); } /* diff --git a/drivers/scsi/bfa/bfad.c b/drivers/scsi/bfa/bfad.c index 440ef32be048fee51aad6c7380d72778b09fe6ed..e5aa982ffedc3405c115b9fa441c87bf079d9a3d 100644 --- a/drivers/scsi/bfa/bfad.c +++ b/drivers/scsi/bfa/bfad.c @@ -732,9 +732,6 @@ bfad_pci_init(struct pci_dev *pdev, struct bfad_s *bfad) pci_set_master(pdev); rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); - if (rc) { rc = -ENODEV; printk(KERN_ERR "dma_set_mask_and_coherent fail %p\n", pdev); @@ -1559,9 +1556,6 @@ bfad_pci_slot_reset(struct pci_dev *pdev) pci_set_master(pdev); rc = dma_set_mask_and_coherent(&bfad->pcidev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&bfad->pcidev->dev, - DMA_BIT_MASK(32)); if (rc) goto out_disable_device; diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 71fa62bd30830c4d8f982a4c0cf3837f5d895f72..a826456c60759c6ca6b3a0460a1ffac7ecd67a3a 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -82,7 +82,7 @@ static int bnx2fc_bind_pcidev(struct bnx2fc_hba *hba); static void bnx2fc_unbind_pcidev(struct bnx2fc_hba *hba); static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, struct device *parent, int npiv); -static void bnx2fc_destroy_work(struct work_struct *work); +static void bnx2fc_port_destroy(struct fcoe_port *port); static struct bnx2fc_hba *bnx2fc_hba_lookup(struct net_device *phys_dev); static struct bnx2fc_interface *bnx2fc_interface_lookup(struct net_device @@ -508,7 +508,8 @@ static int bnx2fc_l2_rcv_thread(void *arg) static void bnx2fc_recv_frame(struct sk_buff *skb) { - u32 fr_len; + u64 crc_err; + u32 fr_len, fr_crc; struct fc_lport *lport; struct fcoe_rcv_info *fr; struct fc_stats *stats; @@ -542,6 +543,11 @@ static void bnx2fc_recv_frame(struct sk_buff *skb) skb_pull(skb, sizeof(struct fcoe_hdr)); fr_len = skb->len - sizeof(struct fcoe_crc_eof); + stats = per_cpu_ptr(lport->stats, get_cpu()); + stats->RxFrames++; + stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; + put_cpu(); + fp = (struct fc_frame *)skb; fc_frame_init(fp); fr_dev(fp) = lport; @@ -624,16 +630,15 @@ static void bnx2fc_recv_frame(struct sk_buff *skb) return; } - stats = per_cpu_ptr(lport->stats, smp_processor_id()); - stats->RxFrames++; - stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; + fr_crc = le32_to_cpu(fr_crc(fp)); - if (le32_to_cpu(fr_crc(fp)) != - ~crc32(~0, skb->data, fr_len)) { - if (stats->InvalidCRCCount < 5) + if (unlikely(fr_crc != ~crc32(~0, skb->data, fr_len))) { + stats = per_cpu_ptr(lport->stats, get_cpu()); + crc_err = (stats->InvalidCRCCount++); + put_cpu(); + if (crc_err < 5) printk(KERN_WARNING PFX "dropping frame with " "CRC error\n"); - stats->InvalidCRCCount++; kfree_skb(skb); return; } @@ -907,9 +912,6 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, __bnx2fc_destroy(interface); } mutex_unlock(&bnx2fc_dev_lock); - - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); return; default: @@ -1215,8 +1217,8 @@ static int bnx2fc_vport_destroy(struct fc_vport *vport) mutex_unlock(&n_port->lp_mutex); bnx2fc_free_vport(interface->hba, port->lport); bnx2fc_port_shutdown(port->lport); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); return 0; } @@ -1525,7 +1527,6 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, port->lport = lport; port->priv = interface; port->get_netdev = bnx2fc_netdev; - INIT_WORK(&port->destroy_work, bnx2fc_destroy_work); /* Configure fcoe_port */ rc = bnx2fc_lport_config(lport); @@ -1653,8 +1654,8 @@ static void __bnx2fc_destroy(struct bnx2fc_interface *interface) bnx2fc_interface_cleanup(interface); bnx2fc_stop(interface); list_del(&interface->list); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); } /** @@ -1694,15 +1695,12 @@ netdev_err: return rc; } -static void bnx2fc_destroy_work(struct work_struct *work) +static void bnx2fc_port_destroy(struct fcoe_port *port) { - struct fcoe_port *port; struct fc_lport *lport; - port = container_of(work, struct fcoe_port, destroy_work); lport = port->lport; - - BNX2FC_HBA_DBG(lport, "Entered bnx2fc_destroy_work\n"); + BNX2FC_HBA_DBG(lport, "Entered %s, destroying lport %p\n", __func__, lport); bnx2fc_if_destroy(lport); } @@ -2556,9 +2554,6 @@ static void bnx2fc_ulp_exit(struct cnic_dev *dev) __bnx2fc_destroy(interface); mutex_unlock(&bnx2fc_dev_lock); - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); - bnx2fc_ulp_stop(hba); /* unregister cnic device */ if (test_and_clear_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic)) diff --git a/drivers/scsi/elx/efct/efct_driver.c b/drivers/scsi/elx/efct/efct_driver.c index ae62fc3c9ee369184f4c9d1ca0ff1f8910405ae2..b08fc8839808d2b63f83765e80eb60cc7135406a 100644 --- a/drivers/scsi/elx/efct/efct_driver.c +++ b/drivers/scsi/elx/efct/efct_driver.c @@ -541,13 +541,10 @@ efct_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, efct); - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0) { - dev_warn(&pdev->dev, "trying DMA_BIT_MASK(32)\n"); - if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) { - dev_err(&pdev->dev, "setting DMA_BIT_MASK failed\n"); - rc = -1; - goto dma_mask_out; - } + rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) { + dev_err(&pdev->dev, "setting DMA_BIT_MASK failed\n"); + goto dma_mask_out; } num_interrupts = efct_device_interrupts_required(efct); diff --git a/drivers/scsi/elx/libefc/efc_els.c b/drivers/scsi/elx/libefc/efc_els.c index 7bb4f9aad2c808754da6eab6165175b09a203912..84bc81d7ce7651cf43db1a94c56ac759ec57339b 100644 --- a/drivers/scsi/elx/libefc/efc_els.c +++ b/drivers/scsi/elx/libefc/efc_els.c @@ -46,18 +46,14 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) efc = node->efc; - spin_lock_irqsave(&node->els_ios_lock, flags); - if (!node->els_io_enabled) { efc_log_err(efc, "els io alloc disabled\n"); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } els = mempool_alloc(efc->els_io_pool, GFP_ATOMIC); if (!els) { atomic_add_return(1, &efc->els_io_alloc_failed_count); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } @@ -74,7 +70,6 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) &els->io.req.phys, GFP_KERNEL); if (!els->io.req.virt) { mempool_free(els, efc->els_io_pool); - spin_unlock_irqrestore(&node->els_ios_lock, flags); return NULL; } @@ -94,10 +89,11 @@ efc_els_io_alloc_size(struct efc_node *node, u32 reqlen, u32 rsplen) /* add els structure to ELS IO list */ INIT_LIST_HEAD(&els->list_entry); + spin_lock_irqsave(&node->els_ios_lock, flags); list_add_tail(&els->list_entry, &node->els_ios_list); + spin_unlock_irqrestore(&node->els_ios_lock, flags); } - spin_unlock_irqrestore(&node->els_ios_lock, flags); return els; } diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index f46f679fe825803dfa67c634f7b27a7d8670a6f3..ebf5ec38891bb5508fb10d06a3b8c5cfa0efaf35 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -400,8 +400,7 @@ void hisi_sas_task_deliver(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot, struct hisi_sas_dq *dq, struct hisi_sas_device *sas_dev, - struct hisi_sas_internal_abort *abort, - struct hisi_sas_tmf_task *tmf) + struct hisi_sas_internal_abort *abort) { struct hisi_sas_cmd_hdr *cmd_hdr_base; int dlvry_queue_slot, dlvry_queue; @@ -427,8 +426,6 @@ void hisi_sas_task_deliver(struct hisi_hba *hisi_hba, cmd_hdr_base = hisi_hba->cmd_hdr[dlvry_queue]; slot->cmd_hdr = &cmd_hdr_base[dlvry_queue_slot]; - slot->tmf = tmf; - slot->is_internal = tmf; task->lldd_task = slot; memset(slot->cmd_hdr, 0, sizeof(struct hisi_sas_cmd_hdr)); @@ -587,7 +584,7 @@ static int hisi_sas_task_exec(struct sas_task *task, gfp_t gfp_flags, slot->is_internal = tmf; /* protect task_prep and start_delivery sequence */ - hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, NULL, tmf); + hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, NULL); return 0; @@ -1380,12 +1377,13 @@ static int hisi_sas_softreset_ata_disk(struct domain_device *device) struct hisi_hba *hisi_hba = dev_to_hisi_hba(device); struct device *dev = hisi_hba->dev; int s = sizeof(struct host_to_dev_fis); + struct hisi_sas_tmf_task tmf = {}; ata_for_each_link(link, ap, EDGE) { int pmp = sata_srst_pmp(link); hisi_sas_fill_ata_reset_cmd(link->device, 1, pmp, fis); - rc = hisi_sas_exec_internal_tmf_task(device, fis, s, NULL); + rc = hisi_sas_exec_internal_tmf_task(device, fis, s, &tmf); if (rc != TMF_RESP_FUNC_COMPLETE) break; } @@ -1396,7 +1394,7 @@ static int hisi_sas_softreset_ata_disk(struct domain_device *device) hisi_sas_fill_ata_reset_cmd(link->device, 0, pmp, fis); rc = hisi_sas_exec_internal_tmf_task(device, fis, - s, NULL); + s, &tmf); if (rc != TMF_RESP_FUNC_COMPLETE) dev_err(dev, "ata disk %016llx de-reset failed\n", SAS_ADDR(device->sas_addr)); @@ -1525,16 +1523,11 @@ static void hisi_sas_send_ata_reset_each_phy(struct hisi_hba *hisi_hba, struct device *dev = hisi_hba->dev; int s = sizeof(struct host_to_dev_fis); int rc = TMF_RESP_FUNC_FAILED; - struct asd_sas_phy *sas_phy; struct ata_link *link; u8 fis[20] = {0}; - u32 state; int i; - state = hisi_hba->hw->get_phys_state(hisi_hba); for (i = 0; i < hisi_hba->n_phy; i++) { - if (!(state & BIT(sas_phy->id))) - continue; if (!(sas_port->phy_mask & BIT(i))) continue; @@ -2072,7 +2065,7 @@ hisi_sas_internal_abort_task_exec(struct hisi_hba *hisi_hba, int device_id, slot->port = port; slot->is_internal = true; - hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, abort, NULL); + hisi_sas_task_deliver(hisi_hba, slot, dq, sas_dev, abort); return 0; @@ -2671,9 +2664,6 @@ static struct Scsi_Host *hisi_sas_shost_alloc(struct platform_device *pdev, goto err_out; error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); - if (error) - error = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); - if (error) { dev_err(dev, "No usable DMA addressing method\n"); goto err_out; diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index a45ef9a5e12ee09c8f5b431591f36b91f95767c3..a01a3a7b706b5221f6b0f271835dab9e18c6bee0 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -4695,8 +4695,6 @@ hisi_sas_v3_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_out; rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (rc) - rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); if (rc) { dev_err(dev, "No usable DMA addressing method\n"); rc = -ENODEV; diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 4878c94761f91b147614b87bc333c770010e5f27..a1e0a106c132bd4d2ba64bd60e380256826cf716 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -1161,6 +1161,16 @@ struct lpfc_hba { uint32_t cfg_hostmem_hgp; uint32_t cfg_log_verbose; uint32_t cfg_enable_fc4_type; +#define LPFC_ENABLE_FCP 1 +#define LPFC_ENABLE_NVME 2 +#define LPFC_ENABLE_BOTH 3 +#if (IS_ENABLED(CONFIG_NVME_FC)) +#define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_BOTH +#define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_BOTH +#else +#define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_FCP +#define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_FCP +#endif uint32_t cfg_aer_support; uint32_t cfg_sriov_nr_virtfn; uint32_t cfg_request_firmware_upgrade; @@ -1182,9 +1192,6 @@ struct lpfc_hba { uint32_t cfg_ras_fwlog_func; uint32_t cfg_enable_bbcr; /* Enable BB Credit Recovery */ uint32_t cfg_enable_dpp; /* Enable Direct Packet Push */ -#define LPFC_ENABLE_FCP 1 -#define LPFC_ENABLE_NVME 2 -#define LPFC_ENABLE_BOTH 3 uint32_t cfg_enable_pbde; uint32_t cfg_enable_mi; struct nvmet_fc_target_port *targetport; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 7a7f17d71811b5956683cd34338c72b55d0456f0..bac78fbce8d6eecd7bbba770efb0a7840ed6fcb8 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3978,8 +3978,8 @@ LPFC_ATTR_R(nvmet_mrq_post, * 3 - register both FCP and NVME * Supported values are [1,3]. Default value is 3 */ -LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH, - LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH, +LPFC_ATTR_R(enable_fc4_type, LPFC_DEF_ENBL_FC4_TYPE, + LPFC_ENABLE_FCP, LPFC_MAX_ENBL_FC4_TYPE, "Enable FC4 Protocol support - FCP / NVME"); /* diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index a56f01f659f8ea3cfac9d2970fbfb1726d3e0eca..558f7d2559c4d4e64dbbb5e919ac1ece2797c914 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -2104,7 +2104,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba) } if (reg_err1 == SLIPORT_ERR1_REG_ERR_CODE_2 && reg_err2 == SLIPORT_ERR2_REG_FW_RESTART) { - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "3143 Port Down: Firmware Update " "Detected\n"); en_rn_msg = false; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index cd26c0f8c28148d4b7cd4b003c406484a53ef15b..430abebf99f1513b3ae49ebb70db5d92842a62f3 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -13363,6 +13363,7 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba) uint32_t uerr_sta_hi, uerr_sta_lo; uint32_t if_type, portsmphr; struct lpfc_register portstat_reg; + u32 logmask; /* * For now, use the SLI4 device internal unrecoverable error @@ -13413,7 +13414,12 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba) readl(phba->sli4_hba.u.if_type2.ERR1regaddr); phba->work_status[1] = readl(phba->sli4_hba.u.if_type2.ERR2regaddr); - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, + logmask = LOG_TRACE_EVENT; + if (phba->work_status[0] == + SLIPORT_ERR1_REG_ERR_CODE_2 && + phba->work_status[1] == SLIPORT_ERR2_REG_FW_RESTART) + logmask = LOG_SLI; + lpfc_printf_log(phba, KERN_ERR, logmask, "2885 Port Status Event: " "port status reg 0x%x, " "port smphr reg 0x%x, " @@ -17982,8 +17988,8 @@ lpfc_sli4_alloc_xri(struct lpfc_hba *phba) * the driver starts at 0 each time. */ spin_lock_irq(&phba->hbalock); - xri = find_next_zero_bit(phba->sli4_hba.xri_bmask, - phba->sli4_hba.max_cfg_param.max_xri, 0); + xri = find_first_zero_bit(phba->sli4_hba.xri_bmask, + phba->sli4_hba.max_cfg_param.max_xri); if (xri >= phba->sli4_hba.max_cfg_param.max_xri) { spin_unlock_irq(&phba->hbalock); return NO_XRI; @@ -19660,7 +19666,7 @@ lpfc_sli4_alloc_rpi(struct lpfc_hba *phba) max_rpi = phba->sli4_hba.max_cfg_param.max_rpi; rpi_limit = phba->sli4_hba.next_rpi; - rpi = find_next_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit, 0); + rpi = find_first_zero_bit(phba->sli4_hba.rpi_bmask, rpi_limit); if (rpi >= rpi_limit) rpi = LPFC_RPI_ALLOC_ERROR; else { @@ -20303,8 +20309,8 @@ next_priority: * have been tested so that we can detect when we should * change the priority level. */ - next_fcf_index = find_next_bit(phba->fcf.fcf_rr_bmask, - LPFC_SLI4_FCF_TBL_INDX_MAX, 0); + next_fcf_index = find_first_bit(phba->fcf.fcf_rr_bmask, + LPFC_SLI4_FCF_TBL_INDX_MAX); } diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 0d31d7a5e3352fc8e0135cc097dab7a910bfb1ed..bf987f3a7f3f2c71c3315531e22ca93f77afc63b 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -192,23 +192,21 @@ mega_query_adapter(adapter_t *adapter) { dma_addr_t prod_info_dma_handle; mega_inquiry3 *inquiry3; - u8 raw_mbox[sizeof(struct mbox_out)]; - mbox_t *mbox; + struct mbox_out mbox; + u8 *raw_mbox = (u8 *)&mbox; int retval; /* Initialize adapter inquiry mailbox */ - mbox = (mbox_t *)raw_mbox; - memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE); - memset(&mbox->m_out, 0, sizeof(raw_mbox)); + memset(&mbox, 0, sizeof(mbox)); /* * Try to issue Inquiry3 command * if not succeeded, then issue MEGA_MBOXCMD_ADAPTERINQ command and * update enquiry3 structure */ - mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle; + mbox.xferaddr = (u32)adapter->buf_dma_handle; inquiry3 = (mega_inquiry3 *)adapter->mega_buffer; @@ -232,10 +230,10 @@ mega_query_adapter(adapter_t *adapter) inq = &ext_inq->raid_inq; - mbox->m_out.xferaddr = (u32)dma_handle; + mbox.xferaddr = (u32)dma_handle; /*issue old 0x04 command to adapter */ - mbox->m_out.cmd = MEGA_MBOXCMD_ADPEXTINQ; + mbox.cmd = MEGA_MBOXCMD_ADPEXTINQ; issue_scb_block(adapter, raw_mbox); @@ -262,7 +260,7 @@ mega_query_adapter(adapter_t *adapter) sizeof(mega_product_info), DMA_FROM_DEVICE); - mbox->m_out.xferaddr = prod_info_dma_handle; + mbox.xferaddr = prod_info_dma_handle; raw_mbox[0] = FC_NEW_CONFIG; /* i.e. mbox->cmd=0xA1 */ raw_mbox[2] = NC_SUBOP_PRODUCT_INFO; /* i.e. 0x0E */ @@ -3569,16 +3567,14 @@ mega_n_to_m(void __user *arg, megacmd_t *mc) static int mega_is_bios_enabled(adapter_t *adapter) { - unsigned char raw_mbox[sizeof(struct mbox_out)]; - mbox_t *mbox; - - mbox = (mbox_t *)raw_mbox; + struct mbox_out mbox; + unsigned char *raw_mbox = (u8 *)&mbox; - memset(&mbox->m_out, 0, sizeof(raw_mbox)); + memset(&mbox, 0, sizeof(mbox)); memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE); - mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle; + mbox.xferaddr = (u32)adapter->buf_dma_handle; raw_mbox[0] = IS_BIOS_ENABLED; raw_mbox[2] = GET_BIOS; @@ -3600,13 +3596,11 @@ mega_is_bios_enabled(adapter_t *adapter) static void mega_enum_raid_scsi(adapter_t *adapter) { - unsigned char raw_mbox[sizeof(struct mbox_out)]; - mbox_t *mbox; + struct mbox_out mbox; + unsigned char *raw_mbox = (u8 *)&mbox; int i; - mbox = (mbox_t *)raw_mbox; - - memset(&mbox->m_out, 0, sizeof(raw_mbox)); + memset(&mbox, 0, sizeof(mbox)); /* * issue command to find out what channels are raid/scsi @@ -3616,7 +3610,7 @@ mega_enum_raid_scsi(adapter_t *adapter) memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE); - mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle; + mbox.xferaddr = (u32)adapter->buf_dma_handle; /* * Non-ROMB firmware fail this command, so all channels @@ -3655,23 +3649,21 @@ static void mega_get_boot_drv(adapter_t *adapter) { struct private_bios_data *prv_bios_data; - unsigned char raw_mbox[sizeof(struct mbox_out)]; - mbox_t *mbox; + struct mbox_out mbox; + unsigned char *raw_mbox = (u8 *)&mbox; u16 cksum = 0; u8 *cksum_p; u8 boot_pdrv; int i; - mbox = (mbox_t *)raw_mbox; - - memset(&mbox->m_out, 0, sizeof(raw_mbox)); + memset(&mbox, 0, sizeof(mbox)); raw_mbox[0] = BIOS_PVT_DATA; raw_mbox[2] = GET_BIOS_PVT_DATA; memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE); - mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle; + mbox.xferaddr = (u32)adapter->buf_dma_handle; adapter->boot_ldrv_enabled = 0; adapter->boot_ldrv = 0; @@ -3721,13 +3713,11 @@ mega_get_boot_drv(adapter_t *adapter) static int mega_support_random_del(adapter_t *adapter) { - unsigned char raw_mbox[sizeof(struct mbox_out)]; - mbox_t *mbox; + struct mbox_out mbox; + unsigned char *raw_mbox = (u8 *)&mbox; int rval; - mbox = (mbox_t *)raw_mbox; - - memset(&mbox->m_out, 0, sizeof(raw_mbox)); + memset(&mbox, 0, sizeof(mbox)); /* * issue command @@ -3750,13 +3740,11 @@ mega_support_random_del(adapter_t *adapter) static int mega_support_ext_cdb(adapter_t *adapter) { - unsigned char raw_mbox[sizeof(struct mbox_out)]; - mbox_t *mbox; + struct mbox_out mbox; + unsigned char *raw_mbox = (u8 *)&mbox; int rval; - mbox = (mbox_t *)raw_mbox; - - memset(&mbox->m_out, 0, sizeof(raw_mbox)); + memset(&mbox, 0, sizeof(mbox)); /* * issue command to find out if controller supports extended CDBs. */ @@ -3865,16 +3853,14 @@ mega_do_del_logdrv(adapter_t *adapter, int logdrv) static void mega_get_max_sgl(adapter_t *adapter) { - unsigned char raw_mbox[sizeof(struct mbox_out)]; - mbox_t *mbox; + struct mbox_out mbox; + unsigned char *raw_mbox = (u8 *)&mbox; - mbox = (mbox_t *)raw_mbox; - - memset(mbox, 0, sizeof(raw_mbox)); + memset(&mbox, 0, sizeof(mbox)); memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE); - mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle; + mbox.xferaddr = (u32)adapter->buf_dma_handle; raw_mbox[0] = MAIN_MISC_OPCODE; raw_mbox[2] = GET_MAX_SG_SUPPORT; @@ -3888,7 +3874,7 @@ mega_get_max_sgl(adapter_t *adapter) } else { adapter->sglen = *((char *)adapter->mega_buffer); - + /* * Make sure this is not more than the resources we are * planning to allocate @@ -3910,16 +3896,14 @@ mega_get_max_sgl(adapter_t *adapter) static int mega_support_cluster(adapter_t *adapter) { - unsigned char raw_mbox[sizeof(struct mbox_out)]; - mbox_t *mbox; - - mbox = (mbox_t *)raw_mbox; + struct mbox_out mbox; + unsigned char *raw_mbox = (u8 *)&mbox; - memset(mbox, 0, sizeof(raw_mbox)); + memset(&mbox, 0, sizeof(mbox)); memset((void *)adapter->mega_buffer, 0, MEGA_BUFFER_SIZE); - mbox->m_out.xferaddr = (u32)adapter->buf_dma_handle; + mbox.xferaddr = (u32)adapter->buf_dma_handle; /* * Try to get the initiator id. This command will succeed iff the diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c index c39dd4978c9d1081a582348b8d57237189ab440a..15bdc21ead669354a798bdf54dcf5b92a2093d43 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_fw.c +++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c @@ -901,7 +901,7 @@ static const struct { }, { MPI3MR_RESET_FROM_SYSFS, "sysfs invocation" }, { MPI3MR_RESET_FROM_SYSFS_TIMEOUT, "sysfs TM timeout" }, - { MPI3MR_RESET_FROM_FIRMWARE, "firmware asynchronus reset" }, + { MPI3MR_RESET_FROM_FIRMWARE, "firmware asynchronous reset" }, }; /** @@ -1242,7 +1242,7 @@ static int mpi3mr_bring_ioc_ready(struct mpi3mr_ioc *mrioc) ioc_state = mpi3mr_get_iocstate(mrioc); if (ioc_state == MRIOC_STATE_READY) { ioc_info(mrioc, - "successfully transistioned to %s state\n", + "successfully transitioned to %s state\n", mpi3mr_iocstate_name(ioc_state)); return 0; } @@ -2174,7 +2174,7 @@ out: * mpi3mr_check_rh_fault_ioc - check reset history and fault * controller * @mrioc: Adapter instance reference - * @reason_code, reason code for the fault. + * @reason_code: reason code for the fault. * * This routine will save snapdump and fault the controller with * the given reason code if it is not already in the fault or @@ -3633,7 +3633,6 @@ static int mpi3mr_enable_events(struct mpi3mr_ioc *mrioc) /** * mpi3mr_init_ioc - Initialize the controller * @mrioc: Adapter instance reference - * @init_type: Flag to indicate is the init_type * * This the controller initialization routine, executed either * after soft reset or from pci probe callback. @@ -3844,7 +3843,7 @@ retry_init: if (mrioc->shost->nr_hw_queues > mrioc->num_op_reply_q) { ioc_err(mrioc, - "cannot create minimum number of operatioanl queues expected:%d created:%d\n", + "cannot create minimum number of operational queues expected:%d created:%d\n", mrioc->shost->nr_hw_queues, mrioc->num_op_reply_q); goto out_failed_noretry; } @@ -4174,7 +4173,7 @@ static void mpi3mr_issue_ioc_shutdown(struct mpi3mr_ioc *mrioc) /** * mpi3mr_cleanup_ioc - Cleanup controller * @mrioc: Adapter instance reference - + * * controller cleanup handler, Message unit reset or soft reset * and shutdown notification is issued to the controller. * diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index a0af986633d2a016a2ccdb1b4d83a5dbe4f631b4..949e98d523e2836425286373cdb19c61c8e16dff 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -77,8 +77,8 @@ #define MPT3SAS_DRIVER_NAME "mpt3sas" #define MPT3SAS_AUTHOR "Avago Technologies " #define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver" -#define MPT3SAS_DRIVER_VERSION "39.100.00.00" -#define MPT3SAS_MAJOR_VERSION 39 +#define MPT3SAS_DRIVER_VERSION "40.100.00.00" +#define MPT3SAS_MAJOR_VERSION 40 #define MPT3SAS_MINOR_VERSION 100 #define MPT3SAS_BUILD_VERSION 0 #define MPT3SAS_RELEASE_VERSION 00 diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index 05b6c6a073c34b42a698efa0e64e8b14a32c567b..d92ca140d298c221292dbcca60e1245ae27c2db4 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -3533,11 +3533,31 @@ diag_trigger_master_store(struct device *cdev, { struct Scsi_Host *shost = class_to_shost(cdev); struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); + struct SL_WH_MASTER_TRIGGER_T *master_tg; unsigned long flags; ssize_t rc; + bool set = 1; - spin_lock_irqsave(&ioc->diag_trigger_lock, flags); rc = min(sizeof(struct SL_WH_MASTER_TRIGGER_T), count); + + if (ioc->supports_trigger_pages) { + master_tg = kzalloc(sizeof(struct SL_WH_MASTER_TRIGGER_T), + GFP_KERNEL); + if (!master_tg) + return -ENOMEM; + + memcpy(master_tg, buf, rc); + if (!master_tg->MasterData) + set = 0; + if (mpt3sas_config_update_driver_trigger_pg1(ioc, master_tg, + set)) { + kfree(master_tg); + return -EFAULT; + } + kfree(master_tg); + } + + spin_lock_irqsave(&ioc->diag_trigger_lock, flags); memset(&ioc->diag_trigger_master, 0, sizeof(struct SL_WH_MASTER_TRIGGER_T)); memcpy(&ioc->diag_trigger_master, buf, rc); @@ -3589,11 +3609,31 @@ diag_trigger_event_store(struct device *cdev, { struct Scsi_Host *shost = class_to_shost(cdev); struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); + struct SL_WH_EVENT_TRIGGERS_T *event_tg; unsigned long flags; ssize_t sz; + bool set = 1; - spin_lock_irqsave(&ioc->diag_trigger_lock, flags); sz = min(sizeof(struct SL_WH_EVENT_TRIGGERS_T), count); + if (ioc->supports_trigger_pages) { + event_tg = kzalloc(sizeof(struct SL_WH_EVENT_TRIGGERS_T), + GFP_KERNEL); + if (!event_tg) + return -ENOMEM; + + memcpy(event_tg, buf, sz); + if (!event_tg->ValidEntries) + set = 0; + if (mpt3sas_config_update_driver_trigger_pg2(ioc, event_tg, + set)) { + kfree(event_tg); + return -EFAULT; + } + kfree(event_tg); + } + + spin_lock_irqsave(&ioc->diag_trigger_lock, flags); + memset(&ioc->diag_trigger_event, 0, sizeof(struct SL_WH_EVENT_TRIGGERS_T)); memcpy(&ioc->diag_trigger_event, buf, sz); @@ -3644,11 +3684,31 @@ diag_trigger_scsi_store(struct device *cdev, { struct Scsi_Host *shost = class_to_shost(cdev); struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); + struct SL_WH_SCSI_TRIGGERS_T *scsi_tg; unsigned long flags; ssize_t sz; + bool set = 1; + + sz = min(sizeof(struct SL_WH_SCSI_TRIGGERS_T), count); + if (ioc->supports_trigger_pages) { + scsi_tg = kzalloc(sizeof(struct SL_WH_SCSI_TRIGGERS_T), + GFP_KERNEL); + if (!scsi_tg) + return -ENOMEM; + + memcpy(scsi_tg, buf, sz); + if (!scsi_tg->ValidEntries) + set = 0; + if (mpt3sas_config_update_driver_trigger_pg3(ioc, scsi_tg, + set)) { + kfree(scsi_tg); + return -EFAULT; + } + kfree(scsi_tg); + } spin_lock_irqsave(&ioc->diag_trigger_lock, flags); - sz = min(sizeof(ioc->diag_trigger_scsi), count); + memset(&ioc->diag_trigger_scsi, 0, sizeof(ioc->diag_trigger_scsi)); memcpy(&ioc->diag_trigger_scsi, buf, sz); if (ioc->diag_trigger_scsi.ValidEntries > NUM_VALID_ENTRIES) @@ -3698,11 +3758,30 @@ diag_trigger_mpi_store(struct device *cdev, { struct Scsi_Host *shost = class_to_shost(cdev); struct MPT3SAS_ADAPTER *ioc = shost_priv(shost); + struct SL_WH_MPI_TRIGGERS_T *mpi_tg; unsigned long flags; ssize_t sz; + bool set = 1; - spin_lock_irqsave(&ioc->diag_trigger_lock, flags); sz = min(sizeof(struct SL_WH_MPI_TRIGGERS_T), count); + if (ioc->supports_trigger_pages) { + mpi_tg = kzalloc(sizeof(struct SL_WH_MPI_TRIGGERS_T), + GFP_KERNEL); + if (!mpi_tg) + return -ENOMEM; + + memcpy(mpi_tg, buf, sz); + if (!mpi_tg->ValidEntries) + set = 0; + if (mpt3sas_config_update_driver_trigger_pg4(ioc, mpi_tg, + set)) { + kfree(mpi_tg); + return -EFAULT; + } + kfree(mpi_tg); + } + + spin_lock_irqsave(&ioc->diag_trigger_lock, flags); memset(&ioc->diag_trigger_mpi, 0, sizeof(ioc->diag_trigger_mpi)); memcpy(&ioc->diag_trigger_mpi, buf, sz); diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 253ceca54a84df1befce9691a928b707cfff510f..7eb8c39da366316877bbb3820eff7488dbec24d1 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -2267,7 +2267,8 @@ static void myrs_cleanup(struct myrs_hba *cs) myrs_unmap(cs); if (cs->mmio_base) { - cs->disable_intr(cs); + if (cs->disable_intr) + cs->disable_intr(cs); iounmap(cs->mmio_base); cs->mmio_base = NULL; } diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c index 8b9e889bc3067e2fbacfc78f201db70efe17c1fd..92c818a8a84a127abbe24da12d6924a8be7d9bac 100644 --- a/drivers/scsi/pcmcia/nsp_cs.c +++ b/drivers/scsi/pcmcia/nsp_cs.c @@ -1557,6 +1557,9 @@ static int nsp_cs_config_check(struct pcmcia_device *p_dev, void *priv_data) data->MmioAddress = (unsigned long) ioremap(p_dev->resource[2]->start, resource_size(p_dev->resource[2])); + if (!data->MmioAddress) + goto next_entry; + data->MmioLength = resource_size(p_dev->resource[2]); } /* If we got this far, we're cool! */ diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index c814e5071712293ed8bce0b4445727c25d671f71..9ec310b795c3375385e6b3f756289fe6115b8d1c 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -2692,7 +2692,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb) u32 tag = le32_to_cpu(psataPayload->tag); u32 port_id = le32_to_cpu(psataPayload->port_id); u32 dev_id = le32_to_cpu(psataPayload->device_id); - unsigned long flags; if (event) pm8001_dbg(pm8001_ha, FAIL, "SATA EVENT 0x%x\n", event); @@ -2724,8 +2723,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb) ts->resp = SAS_TASK_COMPLETE; ts->stat = SAS_DATA_OVERRUN; ts->residual = 0; - if (pm8001_dev) - atomic_dec(&pm8001_dev->running_req); break; case IO_XFER_ERROR_BREAK: pm8001_dbg(pm8001_ha, IO, "IO_XFER_ERROR_BREAK\n"); @@ -2767,7 +2764,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb) IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); ts->resp = SAS_TASK_COMPLETE; ts->stat = SAS_QUEUE_FULL; - pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag); return; } break; @@ -2853,20 +2849,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb) ts->stat = SAS_OPEN_TO; break; } - spin_lock_irqsave(&t->task_state_lock, flags); - t->task_state_flags &= ~SAS_TASK_STATE_PENDING; - t->task_state_flags &= ~SAS_TASK_AT_INITIATOR; - t->task_state_flags |= SAS_TASK_STATE_DONE; - if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) { - spin_unlock_irqrestore(&t->task_state_lock, flags); - pm8001_dbg(pm8001_ha, FAIL, - "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n", - t, event, ts->resp, ts->stat); - pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); - } else { - spin_unlock_irqrestore(&t->task_state_lock, flags); - pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag); - } } /*See the comments for mpi_ssp_completion */ diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index c9a16eef38c10c72d7c6555fb7a84a43a58255df..32edda3e55c6cd2b652355f463ef50d9fd64f242 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -769,8 +769,13 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev, res = -TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + struct pm8001_ccb_info *ccb = task->lldd_task; + pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n", tmf->tmf); + + if (ccb) + ccb->task = NULL; goto ex_err; } @@ -1199,7 +1204,7 @@ int pm8001_abort_task(struct sas_task *task) struct pm8001_device *pm8001_dev; struct pm8001_tmf_task tmf_task; int rc = TMF_RESP_FUNC_FAILED, ret; - u32 phy_id; + u32 phy_id, port_id; struct sas_task_slow slow_task; if (unlikely(!task || !task->lldd_task || !task->dev)) @@ -1246,6 +1251,7 @@ int pm8001_abort_task(struct sas_task *task) DECLARE_COMPLETION_ONSTACK(completion_reset); DECLARE_COMPLETION_ONSTACK(completion); struct pm8001_phy *phy = pm8001_ha->phy + phy_id; + port_id = phy->port->port_id; /* 1. Set Device state as Recovery */ pm8001_dev->setds_completion = &completion; @@ -1297,6 +1303,10 @@ int pm8001_abort_task(struct sas_task *task) PORT_RESET_TMO); if (phy->port_reset_status == PORT_RESET_TMO) { pm8001_dev_gone_notify(dev); + PM8001_CHIP_DISP->hw_event_ack_req( + pm8001_ha, 0, + 0x07, /*HW_EVENT_PHY_DOWN ack*/ + port_id, phy_id, 0, 0); goto out; } } diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h index 83eec16d021d2c892f18c8c2a0389420b3a91926..a17da1cebce17f68ec551a066f8149e108be1d62 100644 --- a/drivers/scsi/pm8001/pm8001_sas.h +++ b/drivers/scsi/pm8001/pm8001_sas.h @@ -216,6 +216,9 @@ struct pm8001_dispatch { u32 state); int (*sas_re_init_req)(struct pm8001_hba_info *pm8001_ha); int (*fatal_errors)(struct pm8001_hba_info *pm8001_ha); + void (*hw_event_ack_req)(struct pm8001_hba_info *pm8001_ha, + u32 Qnum, u32 SEA, u32 port_id, u32 phyId, u32 param0, + u32 param1); }; struct pm8001_chip_info { diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index ad3c6da127157b431fc7df97c7e37291a96a5bd5..9d20f8009b89fca081401bb98123d1828b338c6d 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -2185,9 +2185,9 @@ mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_dbg(pm8001_ha, FAIL, "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n", t, status, ts->resp, ts->stat); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); if (t->slow_task) complete(&t->slow_task->completion); - pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); } else { spin_unlock_irqrestore(&t->task_state_lock, flags); pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); @@ -2794,9 +2794,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, pm8001_dbg(pm8001_ha, FAIL, "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n", t, status, ts->resp, ts->stat); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); if (t->slow_task) complete(&t->slow_task->completion); - pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); } else { spin_unlock_irqrestore(&t->task_state_lock, flags); spin_unlock_irqrestore(&circularQ->oq_lock, @@ -2821,7 +2821,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, u32 tag = le32_to_cpu(psataPayload->tag); u32 port_id = le32_to_cpu(psataPayload->port_id); u32 dev_id = le32_to_cpu(psataPayload->device_id); - unsigned long flags; if (event) pm8001_dbg(pm8001_ha, FAIL, "SATA EVENT 0x%x\n", event); @@ -2854,8 +2853,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, ts->resp = SAS_TASK_COMPLETE; ts->stat = SAS_DATA_OVERRUN; ts->residual = 0; - if (pm8001_dev) - atomic_dec(&pm8001_dev->running_req); break; case IO_XFER_ERROR_BREAK: pm8001_dbg(pm8001_ha, IO, "IO_XFER_ERROR_BREAK\n"); @@ -2904,11 +2901,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, IO_OPEN_CNX_ERROR_IT_NEXUS_LOSS); ts->resp = SAS_TASK_COMPLETE; ts->stat = SAS_QUEUE_FULL; - spin_unlock_irqrestore(&circularQ->oq_lock, - circularQ->lock_flags); - pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag); - spin_lock_irqsave(&circularQ->oq_lock, - circularQ->lock_flags); return; } break; @@ -3008,24 +3000,6 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, ts->stat = SAS_OPEN_TO; break; } - spin_lock_irqsave(&t->task_state_lock, flags); - t->task_state_flags &= ~SAS_TASK_STATE_PENDING; - t->task_state_flags &= ~SAS_TASK_AT_INITIATOR; - t->task_state_flags |= SAS_TASK_STATE_DONE; - if (unlikely((t->task_state_flags & SAS_TASK_STATE_ABORTED))) { - spin_unlock_irqrestore(&t->task_state_lock, flags); - pm8001_dbg(pm8001_ha, FAIL, - "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n", - t, event, ts->resp, ts->stat); - pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); - } else { - spin_unlock_irqrestore(&t->task_state_lock, flags); - spin_unlock_irqrestore(&circularQ->oq_lock, - circularQ->lock_flags); - pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag); - spin_lock_irqsave(&circularQ->oq_lock, - circularQ->lock_flags); - } } /*See the comments for mpi_ssp_completion */ @@ -3712,8 +3686,10 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) break; case HW_EVENT_PORT_RESET_TIMER_TMO: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_RESET_TIMER_TMO\n"); - pm80xx_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN, - port_id, phy_id, 0, 0); + if (!pm8001_ha->phy[phy_id].reset_completion) { + pm80xx_hw_event_ack_req(pm8001_ha, 0, HW_EVENT_PHY_DOWN, + port_id, phy_id, 0, 0); + } sas_phy_disconnected(sas_phy); phy->phy_attached = 0; sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR, @@ -3929,6 +3905,7 @@ static int ssp_coalesced_comp_resp(struct pm8001_hba_info *pm8001_ha, /** * process_one_iomb - process one outbound Queue memory block * @pm8001_ha: our hba card information + * @circularQ: outbound circular queue * @piomb: IO message buffer */ static void process_one_iomb(struct pm8001_hba_info *pm8001_ha, @@ -4149,10 +4126,22 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) u32 ret = MPI_IO_STATUS_FAIL; u32 regval; + /* + * Fatal errors are programmed to be signalled in irq vector + * pm8001_ha->max_q_num - 1 through pm8001_ha->main_cfg_tbl.pm80xx_tbl. + * fatal_err_interrupt + */ if (vec == (pm8001_ha->max_q_num - 1)) { + u32 mipsall_ready; + + if (pm8001_ha->chip_id == chip_8008 || + pm8001_ha->chip_id == chip_8009) + mipsall_ready = SCRATCH_PAD_MIPSALL_READY_8PORT; + else + mipsall_ready = SCRATCH_PAD_MIPSALL_READY_16PORT; + regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); - if ((regval & SCRATCH_PAD_MIPSALL_READY) != - SCRATCH_PAD_MIPSALL_READY) { + if ((regval & mipsall_ready) != mipsall_ready) { pm8001_ha->controller_fatal_error = true; pm8001_dbg(pm8001_ha, FAIL, "Firmware Fatal error! Regval:0x%x\n", @@ -5055,4 +5044,5 @@ const struct pm8001_dispatch pm8001_80xx_dispatch = { .fw_flash_update_req = pm8001_chip_fw_flash_update_req, .set_dev_state_req = pm8001_chip_set_dev_state_req, .fatal_errors = pm80xx_fatal_errors, + .hw_event_ack_req = pm80xx_hw_event_ack_req, }; diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h index c7e5d93bea92439f06d50a520dd9a7c84e46a22f..c41ed039c92ac9087c4d8e90e0e304406d50f71a 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.h +++ b/drivers/scsi/pm8001/pm80xx_hwi.h @@ -1405,8 +1405,12 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t; #define SCRATCH_PAD_BOOT_LOAD_SUCCESS 0x0 #define SCRATCH_PAD_IOP0_READY 0xC00 #define SCRATCH_PAD_IOP1_READY 0x3000 -#define SCRATCH_PAD_MIPSALL_READY (SCRATCH_PAD_IOP1_READY | \ +#define SCRATCH_PAD_MIPSALL_READY_16PORT (SCRATCH_PAD_IOP1_READY | \ SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_ILA_READY | \ + SCRATCH_PAD_RAAE_READY) +#define SCRATCH_PAD_MIPSALL_READY_8PORT (SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_ILA_READY | \ SCRATCH_PAD_RAAE_READY) /* boot loader state */ diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 99a56ca1fb1637ab44f4ff470de761100927c763..fab43dabe5b31760c95dbdb726b6d8ba59cf8636 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -2250,6 +2250,7 @@ process_els: io_req->tm_flags == FCP_TMF_TGT_RESET) { clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); io_req->sc_cmd = NULL; + kref_put(&io_req->refcount, qedf_release_cmd); complete(&io_req->tm_done); } diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index 1bf7a22d494809ea0d846f5e4701245744c575c1..6ad28bc8e948310b45a628c1dd9c371ad014ae46 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -911,7 +911,7 @@ void qedf_ctx_soft_reset(struct fc_lport *lport) struct qed_link_output if_link; if (lport->vport) { - QEDF_ERR(NULL, "Cannot issue host reset on NPIV port.\n"); + printk_ratelimited("Cannot issue host reset on NPIV port.\n"); return; } @@ -1415,6 +1415,8 @@ static void qedf_upload_connection(struct qedf_ctx *qedf, */ term_params = dma_alloc_coherent(&qedf->pdev->dev, QEDF_TERM_BUFF_SIZE, &term_params_dma, GFP_KERNEL); + if (!term_params) + return; QEDF_INFO(&(qedf->dbg_ctx), QEDF_LOG_CONN, "Uploading connection " "port_id=%06x.\n", fcport->rdata->ids.port_id); @@ -1862,6 +1864,7 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled) vport_qedf->cmd_mgr = base_qedf->cmd_mgr; init_completion(&vport_qedf->flogi_compl); INIT_LIST_HEAD(&vport_qedf->fcports); + INIT_DELAYED_WORK(&vport_qedf->stag_work, qedf_stag_change_work); rc = qedf_vport_libfc_config(vport, vn_port); if (rc) { @@ -3978,7 +3981,9 @@ void qedf_stag_change_work(struct work_struct *work) struct qedf_ctx *qedf = container_of(work, struct qedf_ctx, stag_work.work); - QEDF_ERR(&qedf->dbg_ctx, "Performing software context reset.\n"); + printk_ratelimited("[%s]:[%s:%d]:%d: Performing software context reset.", + dev_name(&qedf->pdev->dev), __func__, __LINE__, + qedf->dbg_ctx.host_no); qedf_ctx_soft_reset(qedf->lport); } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 35e381f6d371e6b9ce0cb8f60b90e2c79869ef2f..0a70aa763a961f7a0d2a31f7e192c14b045722a8 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2067,7 +2067,6 @@ void scsi_exit_queue(void) * @sdev: SCSI device to be queried * @pf: Page format bit (1 == standard, 0 == vendor specific) * @sp: Save page bit (0 == don't save, 1 == save) - * @modepage: mode page being requested * @buffer: request buffer (may not be smaller than eight bytes) * @len: length of request buffer. * @timeout: command timeout @@ -2080,10 +2079,9 @@ void scsi_exit_queue(void) * status on error * */ -int -scsi_mode_select(struct scsi_device *sdev, int pf, int sp, int modepage, - unsigned char *buffer, int len, int timeout, int retries, - struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) +int scsi_mode_select(struct scsi_device *sdev, int pf, int sp, + unsigned char *buffer, int len, int timeout, int retries, + struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { unsigned char cmd[10]; unsigned char *real_buffer; diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c index d6982d3557396b02fd89f5a664450a8a50a37719..95aee1ad1383408482fd4000893b8f141ee5dea5 100644 --- a/drivers/scsi/scsi_proc.c +++ b/drivers/scsi/scsi_proc.c @@ -49,7 +49,7 @@ static DEFINE_MUTEX(global_host_template_mutex); static ssize_t proc_scsi_host_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct Scsi_Host *shost = PDE_DATA(file_inode(file)); + struct Scsi_Host *shost = pde_data(file_inode(file)); ssize_t ret = -ENOMEM; char *page; @@ -79,7 +79,7 @@ static int proc_scsi_show(struct seq_file *m, void *v) static int proc_scsi_host_open(struct inode *inode, struct file *file) { - return single_open_size(file, proc_scsi_show, PDE_DATA(inode), + return single_open_size(file, proc_scsi_show, pde_data(inode), 4 * PAGE_SIZE); } diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 3520b9384428941a7ae9a06371b2810f66978d13..f4e6c68ac99eddad151716e36e075347ddb54d1d 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -214,6 +214,48 @@ static void scsi_unlock_floptical(struct scsi_device *sdev, SCSI_TIMEOUT, 3, NULL); } +static int scsi_realloc_sdev_budget_map(struct scsi_device *sdev, + unsigned int depth) +{ + int new_shift = sbitmap_calculate_shift(depth); + bool need_alloc = !sdev->budget_map.map; + bool need_free = false; + int ret; + struct sbitmap sb_backup; + + /* + * realloc if new shift is calculated, which is caused by setting + * up one new default queue depth after calling ->slave_configure + */ + if (!need_alloc && new_shift != sdev->budget_map.shift) + need_alloc = need_free = true; + + if (!need_alloc) + return 0; + + /* + * Request queue has to be frozen for reallocating budget map, + * and here disk isn't added yet, so freezing is pretty fast + */ + if (need_free) { + blk_mq_freeze_queue(sdev->request_queue); + sb_backup = sdev->budget_map; + } + ret = sbitmap_init_node(&sdev->budget_map, + scsi_device_max_queue_depth(sdev), + new_shift, GFP_KERNEL, + sdev->request_queue->node, false, true); + if (need_free) { + if (ret) + sdev->budget_map = sb_backup; + else + sbitmap_free(&sb_backup); + ret = 0; + blk_mq_unfreeze_queue(sdev->request_queue); + } + return ret; +} + /** * scsi_alloc_sdev - allocate and setup a scsi_Device * @starget: which target to allocate a &scsi_device for @@ -306,11 +348,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, * default device queue depth to figure out sbitmap shift * since we use this queue depth most of times. */ - if (sbitmap_init_node(&sdev->budget_map, - scsi_device_max_queue_depth(sdev), - sbitmap_calculate_shift(depth), - GFP_KERNEL, sdev->request_queue->node, - false, true)) { + if (scsi_realloc_sdev_budget_map(sdev, depth)) { put_device(&starget->dev); kfree(sdev); goto out; @@ -1017,6 +1055,13 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, } return SCSI_SCAN_NO_RESPONSE; } + + /* + * The queue_depth is often changed in ->slave_configure. + * Set up budget map again since memory consumption of + * the map depends on actual queue depth. + */ + scsi_realloc_sdev_budget_map(sdev, sdev->queue_depth); } if (sdev->scsi_level >= SCSI_3) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 0e73c3f2f381f11549cc03ed852d9cb6785a668c..62eb9921cc947b8cb8e083ea0491434ca4fc0455 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -209,7 +209,7 @@ cache_type_store(struct device *dev, struct device_attribute *attr, */ data.device_specific = 0; - if (scsi_mode_select(sdp, 1, sp, 8, buffer_data, len, SD_TIMEOUT, + if (scsi_mode_select(sdp, 1, sp, buffer_data, len, SD_TIMEOUT, sdkp->max_retries, &data, &sshdr)) { if (scsi_sense_valid(&sshdr)) sd_print_sense_hdr(sdkp, &sshdr); diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index ad12b3261845bf92bb5b9b09c3f664f9041ade59..6b43e97bd417afd134a8f6936e81adcdbc14eded 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -77,7 +77,7 @@ static int sg_proc_init(void); #define SG_DEFAULT_TIMEOUT mult_frac(SG_DEFAULT_TIMEOUT_USER, HZ, USER_HZ) -int sg_big_buff = SG_DEF_RESERVED_SIZE; +static int sg_big_buff = SG_DEF_RESERVED_SIZE; /* N.B. This variable is readable and writeable via /proc/scsi/sg/def_reserved_size . Each time sg_open() is called a buffer of this size (or less if there is not enough memory) will be reserved @@ -1634,6 +1634,37 @@ MODULE_PARM_DESC(scatter_elem_sz, "scatter gather element " MODULE_PARM_DESC(def_reserved_size, "size of buffer reserved for each fd"); MODULE_PARM_DESC(allow_dio, "allow direct I/O (default: 0 (disallow))"); +#ifdef CONFIG_SYSCTL +#include + +static struct ctl_table sg_sysctls[] = { + { + .procname = "sg-big-buff", + .data = &sg_big_buff, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + {} +}; + +static struct ctl_table_header *hdr; +static void register_sg_sysctls(void) +{ + if (!hdr) + hdr = register_sysctl("kernel", sg_sysctls); +} + +static void unregister_sg_sysctls(void) +{ + if (hdr) + unregister_sysctl_table(hdr); +} +#else +#define register_sg_sysctls() do { } while (0) +#define unregister_sg_sysctls() do { } while (0) +#endif /* CONFIG_SYSCTL */ + static int __init init_sg(void) { @@ -1666,6 +1697,7 @@ init_sg(void) return 0; } class_destroy(sg_sysfs_class); + register_sg_sysctls(); err_out: unregister_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), SG_MAX_DEVS); return rc; @@ -1674,6 +1706,7 @@ err_out: static void __exit exit_sg(void) { + unregister_sg_sysctls(); #ifdef CONFIG_SCSI_PROC_FS remove_proc_subtree("scsi/sg", NULL); #endif /* CONFIG_SCSI_PROC_FS */ diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 5393b5c9dd9c87dfbb929608bd84738cbdc162fb..86a938075f308bd41d568127b93214fb129154e2 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -557,7 +557,7 @@ static void ufs_mtk_init_va09_pwr_ctrl(struct ufs_hba *hba) struct ufs_mtk_host *host = ufshcd_get_variant(hba); host->reg_va09 = regulator_get(hba->dev, "va09"); - if (!host->reg_va09) + if (IS_ERR(host->reg_va09)) dev_info(hba->dev, "failed to get va09"); else host->caps |= UFS_MTK_CAP_VA09_PWR_CTRL; diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c index 8b16bbbcb806c7377e07964909259f0493fdd981..87975d1a21c8b1f2f227355181bd5b3d546e0d93 100644 --- a/drivers/scsi/ufs/ufshcd-pltfrm.c +++ b/drivers/scsi/ufs/ufshcd-pltfrm.c @@ -92,6 +92,11 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba) clki->min_freq = clkfreq[i]; clki->max_freq = clkfreq[i+1]; clki->name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!clki->name) { + ret = -ENOMEM; + goto out; + } + if (!strcmp(name, "ref_clk")) clki->keep_link_active = true; dev_dbg(dev, "%s: min %u max %u name %s\n", "freq-table-hz", @@ -127,6 +132,8 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, return -ENOMEM; vreg->name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!vreg->name) + return -ENOMEM; snprintf(prop_name, MAX_PROP_SIZE, "%s-max-microamp", name); if (of_property_read_u32(np, prop_name, &vreg->max_uA)) { diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index 1049e41abd5b2d0a7e87c454bf8b5f4191f5d84b..50b12d60dc1b2deae027c1a476773fca56a6261a 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -7815,7 +7815,7 @@ static int ufshcd_quirk_tune_host_pa_tactivate(struct ufs_hba *hba) peer_pa_tactivate_us = peer_pa_tactivate * gran_to_us_table[peer_granularity - 1]; - if (pa_tactivate_us > peer_pa_tactivate_us) { + if (pa_tactivate_us >= peer_pa_tactivate_us) { u32 new_peer_pa_tactivate; new_peer_pa_tactivate = pa_tactivate_us / @@ -8613,7 +8613,7 @@ static void ufshcd_hba_exit(struct ufs_hba *hba) * @pwr_mode: device power mode to set * * Returns 0 if requested power mode is set successfully - * Returns non-zero if failed to set the requested power mode + * Returns < 0 if failed to set the requested power mode */ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, enum ufs_dev_pwr_mode pwr_mode) @@ -8667,8 +8667,11 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, sdev_printk(KERN_WARNING, sdp, "START_STOP failed for power mode: %d, result %x\n", pwr_mode, ret); - if (ret > 0 && scsi_sense_valid(&sshdr)) - scsi_print_sense_hdr(sdp, NULL, &sshdr); + if (ret > 0) { + if (scsi_sense_valid(&sshdr)) + scsi_print_sense_hdr(sdp, NULL, &sshdr); + ret = -EIO; + } } if (!ret) diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index 6a295c88d850f840eeaa36daf16f388f55a77ea2..a7ff0e5b54946d3e367c6c482c19bfea78462b19 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -142,7 +142,8 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ SYSTEM_BUS_FATAL_ERROR |\ - CRYPTO_ENGINE_FATAL_ERROR) + CRYPTO_ENGINE_FATAL_ERROR |\ + UIC_LINK_LOST) /* HCS - Host Controller Status 30h */ #define DEVICE_PRESENT 0x1 diff --git a/drivers/soc/aspeed/aspeed-lpc-ctrl.c b/drivers/soc/aspeed/aspeed-lpc-ctrl.c index 72771e018c42eae059d2c7adaf2c4cfd7e412856..258894ed234b3ddc3709a7d75251d45c55eeb862 100644 --- a/drivers/soc/aspeed/aspeed-lpc-ctrl.c +++ b/drivers/soc/aspeed/aspeed-lpc-ctrl.c @@ -306,10 +306,9 @@ static int aspeed_lpc_ctrl_probe(struct platform_device *pdev) } lpc_ctrl->clk = devm_clk_get(dev, NULL); - if (IS_ERR(lpc_ctrl->clk)) { - dev_err(dev, "couldn't get clock\n"); - return PTR_ERR(lpc_ctrl->clk); - } + if (IS_ERR(lpc_ctrl->clk)) + return dev_err_probe(dev, PTR_ERR(lpc_ctrl->clk), + "couldn't get clock\n"); rc = clk_prepare_enable(lpc_ctrl->clk); if (rc) { dev_err(dev, "couldn't enable clock\n"); diff --git a/drivers/soc/canaan/Kconfig b/drivers/soc/canaan/Kconfig index 853096b7e84c39e6241250b12b28528e83d5804f..2527cf5757ec9693d0a698d0c32a5fca71466a08 100644 --- a/drivers/soc/canaan/Kconfig +++ b/drivers/soc/canaan/Kconfig @@ -5,7 +5,6 @@ config SOC_K210_SYSCTL depends on RISCV && SOC_CANAAN && OF default SOC_CANAAN select PM - select SYSCON select MFD_SYSCON help Canaan Kendryte K210 SoC system controller driver. diff --git a/drivers/soc/fsl/qbman/bman_portal.c b/drivers/soc/fsl/qbman/bman_portal.c index acda8a5637c52066891aa3ab1f480a397cc8372a..4d7b9caee1c4719675191b8348099e410a756b3a 100644 --- a/drivers/soc/fsl/qbman/bman_portal.c +++ b/drivers/soc/fsl/qbman/bman_portal.c @@ -155,7 +155,7 @@ static int bman_portal_probe(struct platform_device *pdev) } spin_lock(&bman_lock); - cpu = cpumask_next_zero(-1, &portal_cpus); + cpu = cpumask_first_zero(&portal_cpus); if (cpu >= nr_cpu_ids) { __bman_portals_probed = 1; /* unassigned portal, skip init */ diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c index 96f74a1dc60355dcf01f33641cba79c66f393567..e23b60618c1a15aa7f89f655ded6bdbb732ae2a5 100644 --- a/drivers/soc/fsl/qbman/qman_portal.c +++ b/drivers/soc/fsl/qbman/qman_portal.c @@ -248,7 +248,7 @@ static int qman_portal_probe(struct platform_device *pdev) pcfg->pools = qm_get_pools_sdqcr(); spin_lock(&qman_lock); - cpu = cpumask_next_zero(-1, &portal_cpus); + cpu = cpumask_first_zero(&portal_cpus); if (cpu >= nr_cpu_ids) { __qman_portals_probed = 1; /* unassigned portal, skip init */ diff --git a/drivers/soc/samsung/Kconfig b/drivers/soc/samsung/Kconfig index a9f8b224322e42f1ae9ddb7b7b1976a0a7a8f4eb..02e319508cc6d71d1c4aba57c34b0d1e059f28a4 100644 --- a/drivers/soc/samsung/Kconfig +++ b/drivers/soc/samsung/Kconfig @@ -31,7 +31,7 @@ config EXYNOS_USI help Enable support for USI block. USI (Universal Serial Interface) is an IP-core found in modern Samsung Exynos SoCs, like Exynos850 and - ExynosAutoV0. USI block can be configured to provide one of the + ExynosAutoV9. USI block can be configured to provide one of the following serial protocols: UART, SPI or High Speed I2C. This driver allows one to configure USI for desired protocol, which diff --git a/drivers/soc/ti/k3-ringacc.c b/drivers/soc/ti/k3-ringacc.c index 56be391614893e7332f836e7400eb360734de586..31ab6c657fec17e15677dcfda19d08386011d4ba 100644 --- a/drivers/soc/ti/k3-ringacc.c +++ b/drivers/soc/ti/k3-ringacc.c @@ -358,8 +358,8 @@ struct k3_ring *k3_ringacc_request_ring(struct k3_ringacc *ringacc, goto out; if (flags & K3_RINGACC_RING_USE_PROXY) { - proxy_id = find_next_zero_bit(ringacc->proxy_inuse, - ringacc->num_proxies, 0); + proxy_id = find_first_zero_bit(ringacc->proxy_inuse, + ringacc->num_proxies); if (proxy_id == ringacc->num_proxies) goto error; } diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index c9a769b8594b71f6d696dbf8657c71ead2f8e9b3..86c76211b3d3dd4f735cd788d97159514a8a1250 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -585,7 +585,7 @@ static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs) u32 rd = 0; u32 wr = 0; - if (qspi->base[CHIP_SELECT]) { + if (cs >= 0 && qspi->base[CHIP_SELECT]) { rd = bcm_qspi_read(qspi, CHIP_SELECT, 0); wr = (rd & ~0xff) | (1 << cs); if (rd == wr) diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index c208efeadd1847a4be807ce4dc886851f031dc35..0bc7daa7afc83d0d20424800845bb16f1c2c473f 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -693,6 +693,11 @@ static int meson_spicc_probe(struct platform_device *pdev) writel_relaxed(0, spicc->base + SPICC_INTREG); irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto out_master; + } + ret = devm_request_irq(&pdev->dev, irq, meson_spicc_irq, 0, NULL, spicc); if (ret) { diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index a15de10ee286a9e4f7760295fc2420fd8b51c68c..753bd313e6fdaaabe4063fc068e84d8d05b42983 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -624,7 +624,7 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) else mdata->state = MTK_SPI_IDLE; - if (!master->can_dma(master, master->cur_msg->spi, trans)) { + if (!master->can_dma(master, NULL, trans)) { if (trans->rx_buf) { cnt = mdata->xfer_len / 4; ioread32_rep(mdata->base + SPI_RX_DATA_REG, diff --git a/drivers/spi/spi-stm32-qspi.c b/drivers/spi/spi-stm32-qspi.c index 514337c86d2c3f4b3e1f6b064fcc90af7997db7e..ffdc55f87e8210bb36da18e9658214eba006b6c5 100644 --- a/drivers/spi/spi-stm32-qspi.c +++ b/drivers/spi/spi-stm32-qspi.c @@ -688,7 +688,7 @@ static int stm32_qspi_probe(struct platform_device *pdev) struct resource *res; int ret, irq; - ctrl = spi_alloc_master(dev, sizeof(*qspi)); + ctrl = devm_spi_alloc_master(dev, sizeof(*qspi)); if (!ctrl) return -ENOMEM; @@ -697,58 +697,46 @@ static int stm32_qspi_probe(struct platform_device *pdev) res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi"); qspi->io_base = devm_ioremap_resource(dev, res); - if (IS_ERR(qspi->io_base)) { - ret = PTR_ERR(qspi->io_base); - goto err_master_put; - } + if (IS_ERR(qspi->io_base)) + return PTR_ERR(qspi->io_base); qspi->phys_base = res->start; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qspi_mm"); qspi->mm_base = devm_ioremap_resource(dev, res); - if (IS_ERR(qspi->mm_base)) { - ret = PTR_ERR(qspi->mm_base); - goto err_master_put; - } + if (IS_ERR(qspi->mm_base)) + return PTR_ERR(qspi->mm_base); qspi->mm_size = resource_size(res); - if (qspi->mm_size > STM32_QSPI_MAX_MMAP_SZ) { - ret = -EINVAL; - goto err_master_put; - } + if (qspi->mm_size > STM32_QSPI_MAX_MMAP_SZ) + return -EINVAL; irq = platform_get_irq(pdev, 0); - if (irq < 0) { - ret = irq; - goto err_master_put; - } + if (irq < 0) + return irq; ret = devm_request_irq(dev, irq, stm32_qspi_irq, 0, dev_name(dev), qspi); if (ret) { dev_err(dev, "failed to request irq\n"); - goto err_master_put; + return ret; } init_completion(&qspi->data_completion); init_completion(&qspi->match_completion); qspi->clk = devm_clk_get(dev, NULL); - if (IS_ERR(qspi->clk)) { - ret = PTR_ERR(qspi->clk); - goto err_master_put; - } + if (IS_ERR(qspi->clk)) + return PTR_ERR(qspi->clk); qspi->clk_rate = clk_get_rate(qspi->clk); - if (!qspi->clk_rate) { - ret = -EINVAL; - goto err_master_put; - } + if (!qspi->clk_rate) + return -EINVAL; ret = clk_prepare_enable(qspi->clk); if (ret) { dev_err(dev, "can not enable the clock\n"); - goto err_master_put; + return ret; } rstc = devm_reset_control_get_exclusive(dev, NULL); @@ -784,7 +772,7 @@ static int stm32_qspi_probe(struct platform_device *pdev) pm_runtime_enable(dev); pm_runtime_get_noresume(dev); - ret = devm_spi_register_master(dev, ctrl); + ret = spi_register_master(ctrl); if (ret) goto err_pm_runtime_free; @@ -806,8 +794,6 @@ err_dma_free: stm32_qspi_dma_free(qspi); err_clk_disable: clk_disable_unprepare(qspi->clk); -err_master_put: - spi_master_put(qspi->ctrl); return ret; } @@ -817,6 +803,7 @@ static int stm32_qspi_remove(struct platform_device *pdev) struct stm32_qspi *qspi = platform_get_drvdata(pdev); pm_runtime_get_sync(qspi->dev); + spi_unregister_master(qspi->ctrl); /* disable qspi */ writel_relaxed(0, qspi->io_base + QSPI_CR); stm32_qspi_dma_free(qspi); diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 9bd3fd1652f744bc6132198414a2e790f3866a81..7fc24505a72cdd0db46dcb293fdcbe0d2bcd95f2 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -221,7 +221,6 @@ struct stm32_spi; * time between frames (if driver has this functionality) * @set_number_of_data: optional routine to configure registers to desired * number of data (if driver has this functionality) - * @can_dma: routine to determine if the transfer is eligible for DMA use * @transfer_one_dma_start: routine to start transfer a single spi_transfer * using DMA * @dma_rx_cb: routine to call after DMA RX channel operation is complete @@ -232,7 +231,7 @@ struct stm32_spi; * @baud_rate_div_min: minimum baud rate divisor * @baud_rate_div_max: maximum baud rate divisor * @has_fifo: boolean to know if fifo is used for driver - * @has_startbit: boolean to know if start bit is used to start transfer + * @flags: compatible specific SPI controller flags used at registration time */ struct stm32_spi_cfg { const struct stm32_spi_regspec *regs; @@ -253,6 +252,7 @@ struct stm32_spi_cfg { unsigned int baud_rate_div_min; unsigned int baud_rate_div_max; bool has_fifo; + u16 flags; }; /** @@ -1722,6 +1722,7 @@ static const struct stm32_spi_cfg stm32f4_spi_cfg = { .baud_rate_div_min = STM32F4_SPI_BR_DIV_MIN, .baud_rate_div_max = STM32F4_SPI_BR_DIV_MAX, .has_fifo = false, + .flags = SPI_MASTER_MUST_TX, }; static const struct stm32_spi_cfg stm32h7_spi_cfg = { @@ -1854,7 +1855,7 @@ static int stm32_spi_probe(struct platform_device *pdev) master->prepare_message = stm32_spi_prepare_msg; master->transfer_one = stm32_spi_transfer_one; master->unprepare_message = stm32_spi_unprepare_msg; - master->flags = SPI_MASTER_MUST_TX; + master->flags = spi->cfg->flags; spi->dma_tx = dma_request_chan(spi->dev, "tx"); if (IS_ERR(spi->dma_tx)) { diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c index 342ee8d2c47615060c98b83dae0adf8a27646898..cc0da48222311b094e90dc92cb141ef3c0eef1aa 100644 --- a/drivers/spi/spi-uniphier.c +++ b/drivers/spi/spi-uniphier.c @@ -726,7 +726,7 @@ static int uniphier_spi_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "failed to get TX DMA capacities: %d\n", ret); - goto out_disable_clk; + goto out_release_dma; } dma_tx_burst = caps.max_burst; } @@ -735,7 +735,7 @@ static int uniphier_spi_probe(struct platform_device *pdev) if (IS_ERR_OR_NULL(master->dma_rx)) { if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) { ret = -EPROBE_DEFER; - goto out_disable_clk; + goto out_release_dma; } master->dma_rx = NULL; dma_rx_burst = INT_MAX; @@ -744,7 +744,7 @@ static int uniphier_spi_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "failed to get RX DMA capacities: %d\n", ret); - goto out_disable_clk; + goto out_release_dma; } dma_rx_burst = caps.max_burst; } @@ -753,10 +753,20 @@ static int uniphier_spi_probe(struct platform_device *pdev) ret = devm_spi_register_master(&pdev->dev, master); if (ret) - goto out_disable_clk; + goto out_release_dma; return 0; +out_release_dma: + if (!IS_ERR_OR_NULL(master->dma_rx)) { + dma_release_channel(master->dma_rx); + master->dma_rx = NULL; + } + if (!IS_ERR_OR_NULL(master->dma_tx)) { + dma_release_channel(master->dma_tx); + master->dma_tx = NULL; + } + out_disable_clk: clk_disable_unprepare(priv->clk); diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h index 4cdec34e23d2ebb7bd7e1c36b195e8e9d7263ca8..55677efc01380a78f576be27af41a9825bc2279c 100644 --- a/drivers/staging/fbtft/fbtft.h +++ b/drivers/staging/fbtft/fbtft.h @@ -334,7 +334,10 @@ static int __init fbtft_driver_module_init(void) \ ret = spi_register_driver(&fbtft_driver_spi_driver); \ if (ret < 0) \ return ret; \ - return platform_driver_register(&fbtft_driver_platform_driver); \ + ret = platform_driver_register(&fbtft_driver_platform_driver); \ + if (ret < 0) \ + spi_unregister_driver(&fbtft_driver_spi_driver); \ + return ret; \ } \ \ static void __exit fbtft_driver_module_exit(void) \ diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 6759a626150014829faec8f58eaf0585eacfb324..3a2e4582db8e146143f640bc8f6358660814e4b4 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -1058,15 +1058,27 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header, DEBUG_TRACE(SERVICE_CALLBACK_LINE); + rcu_read_lock(); service = handle_to_service(handle); - if (WARN_ON(!service)) + if (WARN_ON(!service)) { + rcu_read_unlock(); return VCHIQ_SUCCESS; + } user_service = (struct user_service *)service->base.userdata; instance = user_service->instance; - if (!instance || instance->closing) + if (!instance || instance->closing) { + rcu_read_unlock(); return VCHIQ_SUCCESS; + } + + /* + * As hopping around different synchronization mechanism, + * taking an extra reference results in simpler implementation. + */ + vchiq_service_get(service); + rcu_read_unlock(); vchiq_log_trace(vchiq_arm_log_level, "%s - service %lx(%d,%p), reason %d, header %lx, instance %lx, bulk_userdata %lx", @@ -1097,6 +1109,7 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header, bulk_userdata); if (status != VCHIQ_SUCCESS) { DEBUG_TRACE(SERVICE_CALLBACK_LINE); + vchiq_service_put(service); return status; } } @@ -1105,10 +1118,12 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header, if (wait_for_completion_interruptible(&user_service->remove_event)) { vchiq_log_info(vchiq_arm_log_level, "%s interrupted", __func__); DEBUG_TRACE(SERVICE_CALLBACK_LINE); + vchiq_service_put(service); return VCHIQ_RETRY; } else if (instance->closing) { vchiq_log_info(vchiq_arm_log_level, "%s closing", __func__); DEBUG_TRACE(SERVICE_CALLBACK_LINE); + vchiq_service_put(service); return VCHIQ_ERROR; } DEBUG_TRACE(SERVICE_CALLBACK_LINE); @@ -1137,6 +1152,7 @@ service_callback(enum vchiq_reason reason, struct vchiq_header *header, header = NULL; } DEBUG_TRACE(SERVICE_CALLBACK_LINE); + vchiq_service_put(service); if (skip_completion) return VCHIQ_SUCCESS; diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 8075f60fd02c337a5fcb0efc8ad74a066737cfb3..2d5cf1714ae05de28eb17ba66db9b5be12338e98 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -443,6 +443,9 @@ static bool iscsit_tpg_check_network_portal( break; } spin_unlock(&tpg->tpg_np_lock); + + if (match) + break; } spin_unlock(&tiqn->tiqn_tpg_lock); diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 1ca320885fad77ad9cbc3d3b4ac1cffe652d6fd4..17a6f51d30892f402d6675b824bb3ab97d4ae614 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -158,6 +158,7 @@ void optee_remove_common(struct optee *optee) optee_unregister_devices(); optee_notif_uninit(optee); + teedev_close_context(optee->ctx); /* * The two devices have to be unregistered before we can free the * other resources. diff --git a/drivers/tee/optee/ffa_abi.c b/drivers/tee/optee/ffa_abi.c index 20a1b1a3d96527bffa67a6832fab1d09aa6d40cd..f2bf6c61197fb888f63b9da5d3146e2b38d2784c 100644 --- a/drivers/tee/optee/ffa_abi.c +++ b/drivers/tee/optee/ffa_abi.c @@ -424,6 +424,7 @@ static struct tee_shm_pool_mgr *optee_ffa_shm_pool_alloc_pages(void) */ static void handle_ffa_rpc_func_cmd_shm_alloc(struct tee_context *ctx, + struct optee *optee, struct optee_msg_arg *arg) { struct tee_shm *shm; @@ -439,7 +440,7 @@ static void handle_ffa_rpc_func_cmd_shm_alloc(struct tee_context *ctx, shm = optee_rpc_cmd_alloc_suppl(ctx, arg->params[0].u.value.b); break; case OPTEE_RPC_SHM_TYPE_KERNEL: - shm = tee_shm_alloc(ctx, arg->params[0].u.value.b, + shm = tee_shm_alloc(optee->ctx, arg->params[0].u.value.b, TEE_SHM_MAPPED | TEE_SHM_PRIV); break; default: @@ -493,14 +494,13 @@ err_bad_param: } static void handle_ffa_rpc_func_cmd(struct tee_context *ctx, + struct optee *optee, struct optee_msg_arg *arg) { - struct optee *optee = tee_get_drvdata(ctx->teedev); - arg->ret_origin = TEEC_ORIGIN_COMMS; switch (arg->cmd) { case OPTEE_RPC_CMD_SHM_ALLOC: - handle_ffa_rpc_func_cmd_shm_alloc(ctx, arg); + handle_ffa_rpc_func_cmd_shm_alloc(ctx, optee, arg); break; case OPTEE_RPC_CMD_SHM_FREE: handle_ffa_rpc_func_cmd_shm_free(ctx, optee, arg); @@ -510,12 +510,12 @@ static void handle_ffa_rpc_func_cmd(struct tee_context *ctx, } } -static void optee_handle_ffa_rpc(struct tee_context *ctx, u32 cmd, - struct optee_msg_arg *arg) +static void optee_handle_ffa_rpc(struct tee_context *ctx, struct optee *optee, + u32 cmd, struct optee_msg_arg *arg) { switch (cmd) { case OPTEE_FFA_YIELDING_CALL_RETURN_RPC_CMD: - handle_ffa_rpc_func_cmd(ctx, arg); + handle_ffa_rpc_func_cmd(ctx, optee, arg); break; case OPTEE_FFA_YIELDING_CALL_RETURN_INTERRUPT: /* Interrupt delivered by now */ @@ -582,7 +582,7 @@ static int optee_ffa_yielding_call(struct tee_context *ctx, * above. */ cond_resched(); - optee_handle_ffa_rpc(ctx, data->data1, rpc_arg); + optee_handle_ffa_rpc(ctx, optee, data->data1, rpc_arg); cmd = OPTEE_FFA_YIELDING_CALL_RESUME; data->data0 = cmd; data->data1 = 0; @@ -619,9 +619,18 @@ static int optee_ffa_do_call_with_arg(struct tee_context *ctx, .data2 = (u32)(shm->sec_world_id >> 32), .data3 = shm->offset, }; - struct optee_msg_arg *arg = tee_shm_get_va(shm, 0); - unsigned int rpc_arg_offs = OPTEE_MSG_GET_ARG_SIZE(arg->num_params); - struct optee_msg_arg *rpc_arg = tee_shm_get_va(shm, rpc_arg_offs); + struct optee_msg_arg *arg; + unsigned int rpc_arg_offs; + struct optee_msg_arg *rpc_arg; + + arg = tee_shm_get_va(shm, 0); + if (IS_ERR(arg)) + return PTR_ERR(arg); + + rpc_arg_offs = OPTEE_MSG_GET_ARG_SIZE(arg->num_params); + rpc_arg = tee_shm_get_va(shm, rpc_arg_offs); + if (IS_ERR(rpc_arg)) + return PTR_ERR(rpc_arg); return optee_ffa_yielding_call(ctx, &data, rpc_arg); } @@ -793,7 +802,9 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev) { const struct ffa_dev_ops *ffa_ops; unsigned int rpc_arg_count; + struct tee_shm_pool *pool; struct tee_device *teedev; + struct tee_context *ctx; struct optee *optee; int rc; @@ -813,12 +824,12 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev) if (!optee) return -ENOMEM; - optee->pool = optee_ffa_config_dyn_shm(); - if (IS_ERR(optee->pool)) { - rc = PTR_ERR(optee->pool); - optee->pool = NULL; - goto err; + pool = optee_ffa_config_dyn_shm(); + if (IS_ERR(pool)) { + rc = PTR_ERR(pool); + goto err_free_optee; } + optee->pool = pool; optee->ops = &optee_ffa_ops; optee->ffa.ffa_dev = ffa_dev; @@ -829,7 +840,7 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev) optee); if (IS_ERR(teedev)) { rc = PTR_ERR(teedev); - goto err; + goto err_free_pool; } optee->teedev = teedev; @@ -837,50 +848,57 @@ static int optee_ffa_probe(struct ffa_device *ffa_dev) optee); if (IS_ERR(teedev)) { rc = PTR_ERR(teedev); - goto err; + goto err_unreg_teedev; } optee->supp_teedev = teedev; rc = tee_device_register(optee->teedev); if (rc) - goto err; + goto err_unreg_supp_teedev; rc = tee_device_register(optee->supp_teedev); if (rc) - goto err; + goto err_unreg_supp_teedev; rc = rhashtable_init(&optee->ffa.global_ids, &shm_rhash_params); if (rc) - goto err; + goto err_unreg_supp_teedev; mutex_init(&optee->ffa.mutex); mutex_init(&optee->call_queue.mutex); INIT_LIST_HEAD(&optee->call_queue.waiters); optee_supp_init(&optee->supp); ffa_dev_set_drvdata(ffa_dev, optee); + ctx = teedev_open(optee->teedev); + if (IS_ERR(ctx)) + goto err_rhashtable_free; + optee->ctx = ctx; rc = optee_notif_init(optee, OPTEE_DEFAULT_MAX_NOTIF_VALUE); - if (rc) { - optee_ffa_remove(ffa_dev); - return rc; - } + if (rc) + goto err_close_ctx; rc = optee_enumerate_devices(PTA_CMD_GET_DEVICES); - if (rc) { - optee_ffa_remove(ffa_dev); - return rc; - } + if (rc) + goto err_unregister_devices; pr_info("initialized driver\n"); return 0; -err: - /* - * tee_device_unregister() is safe to call even if the - * devices hasn't been registered with - * tee_device_register() yet. - */ + +err_unregister_devices: + optee_unregister_devices(); + optee_notif_uninit(optee); +err_close_ctx: + teedev_close_context(ctx); +err_rhashtable_free: + rhashtable_free_and_destroy(&optee->ffa.global_ids, rh_free_fn, NULL); + optee_supp_uninit(&optee->supp); + mutex_destroy(&optee->call_queue.mutex); +err_unreg_supp_teedev: tee_device_unregister(optee->supp_teedev); +err_unreg_teedev: tee_device_unregister(optee->teedev); - if (optee->pool) - tee_shm_pool_free(optee->pool); +err_free_pool: + tee_shm_pool_free(pool); +err_free_optee: kfree(optee); return rc; } diff --git a/drivers/tee/optee/notif.c b/drivers/tee/optee/notif.c index a28fa03dcd0e19c1b6e4e95fb7be4ccdc72ca7dc..05212842b0a50f5498f79a8c7f99d41937ce01ad 100644 --- a/drivers/tee/optee/notif.c +++ b/drivers/tee/optee/notif.c @@ -121,5 +121,5 @@ int optee_notif_init(struct optee *optee, u_int max_key) void optee_notif_uninit(struct optee *optee) { - kfree(optee->notif.bitmap); + bitmap_free(optee->notif.bitmap); } diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index 46f74ab07c7edf67154c2c8a27ca066a986da590..92bc47bef95ffcb65a85990dbb44e6af234da4a6 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -53,7 +53,6 @@ struct optee_call_queue { struct optee_notif { u_int max_key; - struct tee_context *ctx; /* Serializes access to the elements below in this struct */ spinlock_t lock; struct list_head db; @@ -134,9 +133,10 @@ struct optee_ops { /** * struct optee - main service struct * @supp_teedev: supplicant device + * @teedev: client device * @ops: internal callbacks for different ways to reach secure * world - * @teedev: client device + * @ctx: driver internal TEE context * @smc: specific to SMC ABI * @ffa: specific to FF-A ABI * @call_queue: queue of threads waiting to call @invoke_fn @@ -152,6 +152,7 @@ struct optee { struct tee_device *supp_teedev; struct tee_device *teedev; const struct optee_ops *ops; + struct tee_context *ctx; union { struct optee_smc smc; struct optee_ffa ffa; diff --git a/drivers/tee/optee/smc_abi.c b/drivers/tee/optee/smc_abi.c index 449d6a72d289b3ec126d29d6bc833138660d5eb6..1a55339c7072f77ad36cbfd4e7fdca83fb109a4f 100644 --- a/drivers/tee/optee/smc_abi.c +++ b/drivers/tee/optee/smc_abi.c @@ -75,16 +75,6 @@ static int from_msg_param_tmp_mem(struct tee_param *p, u32 attr, p->u.memref.shm_offs = mp->u.tmem.buf_ptr - pa; p->u.memref.shm = shm; - /* Check that the memref is covered by the shm object */ - if (p->u.memref.size) { - size_t o = p->u.memref.shm_offs + - p->u.memref.size - 1; - - rc = tee_shm_get_pa(shm, o, NULL); - if (rc) - return rc; - } - return 0; } @@ -622,6 +612,7 @@ static void handle_rpc_func_cmd_shm_free(struct tee_context *ctx, } static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, + struct optee *optee, struct optee_msg_arg *arg, struct optee_call_ctx *call_ctx) { @@ -651,7 +642,8 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, shm = optee_rpc_cmd_alloc_suppl(ctx, sz); break; case OPTEE_RPC_SHM_TYPE_KERNEL: - shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED | TEE_SHM_PRIV); + shm = tee_shm_alloc(optee->ctx, sz, + TEE_SHM_MAPPED | TEE_SHM_PRIV); break; default: arg->ret = TEEC_ERROR_BAD_PARAMETERS; @@ -747,7 +739,7 @@ static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee, switch (arg->cmd) { case OPTEE_RPC_CMD_SHM_ALLOC: free_pages_list(call_ctx); - handle_rpc_func_cmd_shm_alloc(ctx, arg, call_ctx); + handle_rpc_func_cmd_shm_alloc(ctx, optee, arg, call_ctx); break; case OPTEE_RPC_CMD_SHM_FREE: handle_rpc_func_cmd_shm_free(ctx, arg); @@ -776,7 +768,7 @@ static void optee_handle_rpc(struct tee_context *ctx, switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) { case OPTEE_SMC_RPC_FUNC_ALLOC: - shm = tee_shm_alloc(ctx, param->a1, + shm = tee_shm_alloc(optee->ctx, param->a1, TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) { reg_pair_from_64(¶m->a1, ¶m->a2, pa); @@ -954,57 +946,34 @@ static irqreturn_t notif_irq_thread_fn(int irq, void *dev_id) { struct optee *optee = dev_id; - optee_smc_do_bottom_half(optee->notif.ctx); + optee_smc_do_bottom_half(optee->ctx); return IRQ_HANDLED; } static int optee_smc_notif_init_irq(struct optee *optee, u_int irq) { - struct tee_context *ctx; int rc; - ctx = teedev_open(optee->teedev); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - optee->notif.ctx = ctx; rc = request_threaded_irq(irq, notif_irq_handler, notif_irq_thread_fn, 0, "optee_notification", optee); if (rc) - goto err_close_ctx; + return rc; optee->smc.notif_irq = irq; return 0; - -err_close_ctx: - teedev_close_context(optee->notif.ctx); - optee->notif.ctx = NULL; - - return rc; } static void optee_smc_notif_uninit_irq(struct optee *optee) { - if (optee->notif.ctx) { - optee_smc_stop_async_notif(optee->notif.ctx); + if (optee->smc.sec_caps & OPTEE_SMC_SEC_CAP_ASYNC_NOTIF) { + optee_smc_stop_async_notif(optee->ctx); if (optee->smc.notif_irq) { free_irq(optee->smc.notif_irq, optee); irq_dispose_mapping(optee->smc.notif_irq); } - - /* - * The thread normally working with optee->notif.ctx was - * stopped with free_irq() above. - * - * Note we're not using teedev_close_context() or - * tee_client_close_context() since we have already called - * tee_device_put() while initializing to avoid a circular - * reference counting. - */ - teedev_close_context(optee->notif.ctx); } } @@ -1366,6 +1335,7 @@ static int optee_probe(struct platform_device *pdev) struct optee *optee = NULL; void *memremaped_shm = NULL; struct tee_device *teedev; + struct tee_context *ctx; u32 max_notif_value; u32 sec_caps; int rc; @@ -1446,9 +1416,13 @@ static int optee_probe(struct platform_device *pdev) optee->pool = pool; platform_set_drvdata(pdev, optee); + ctx = teedev_open(optee->teedev); + if (IS_ERR(ctx)) + goto err_supp_uninit; + optee->ctx = ctx; rc = optee_notif_init(optee, max_notif_value); if (rc) - goto err_supp_uninit; + goto err_close_ctx; if (sec_caps & OPTEE_SMC_SEC_CAP_ASYNC_NOTIF) { unsigned int irq; @@ -1496,6 +1470,8 @@ err_disable_shm_cache: optee_unregister_devices(); err_notif_uninit: optee_notif_uninit(optee); +err_close_ctx: + teedev_close_context(ctx); err_supp_uninit: optee_supp_uninit(&optee->supp); mutex_destroy(&optee->call_queue.mutex); diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 8502b7d8df8962d687beec14b302ad9e8f435858..72acb1f61849709f994e4c73610e4225a1a3a095 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -596,6 +596,7 @@ static const struct acpi_device_id int3400_thermal_match[] = { {"INT3400", 0}, {"INTC1040", 0}, {"INTC1041", 0}, + {"INTC10A0", 0}, {} }; diff --git a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c index c3c4c4d34542031e53534d64579d6f4bafd71851..07e25321dfe3bd911e0e2a99f4b008163e1f09e8 100644 --- a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c @@ -285,6 +285,7 @@ static const struct acpi_device_id int3403_device_ids[] = { {"INT3403", 0}, {"INTC1043", 0}, {"INTC1046", 0}, + {"INTC10A1", 0}, {"", 0}, }; MODULE_DEVICE_TABLE(acpi, int3403_device_ids); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h index 9b2a64ef55d0292def8f0a4aecb12c4595c55b69..49932a68abac4df77e855490cf088f0de057a18d 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h @@ -24,6 +24,7 @@ #define PCI_DEVICE_ID_INTEL_HSB_THERMAL 0x0A03 #define PCI_DEVICE_ID_INTEL_ICL_THERMAL 0x8a03 #define PCI_DEVICE_ID_INTEL_JSL_THERMAL 0x4E03 +#define PCI_DEVICE_ID_INTEL_RPL_THERMAL 0xA71D #define PCI_DEVICE_ID_INTEL_SKL_THERMAL 0x1903 #define PCI_DEVICE_ID_INTEL_TGL_THERMAL 0x9A03 diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index b4bcd3fe9eb2f9339a9b780f60cc0d1ad1028cfe..ca40b0967cdda92f5884dc000eeae1201965c983 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -358,6 +358,7 @@ static SIMPLE_DEV_PM_OPS(proc_thermal_pci_pm, proc_thermal_pci_suspend, static const struct pci_device_id proc_thermal_pci_ids[] = { { PCI_DEVICE_DATA(INTEL, ADL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) }, + { PCI_DEVICE_DATA(INTEL, RPL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) }, { }, }; diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index ba27b274c9674bc6410056804fed0fd4cf59b4f7..0b1808e3a912bf1f418385eccbcce1a55fbd72ba 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -322,6 +322,7 @@ static int addr_cnt; #define GSM1_ESCAPE_BITS 0x20 #define XON 0x11 #define XOFF 0x13 +#define ISO_IEC_646_MASK 0x7F static const struct tty_port_operations gsm_port_ops; @@ -531,7 +532,8 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len) int olen = 0; while (len--) { if (*input == GSM1_SOF || *input == GSM1_ESCAPE - || *input == XON || *input == XOFF) { + || (*input & ISO_IEC_646_MASK) == XON + || (*input & ISO_IEC_646_MASK) == XOFF) { *output++ = GSM1_ESCAPE; *output++ = *input++ ^ GSM1_ESCAPE_BITS; olen++; diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index a38fd65e39ab4c662b46f21fb727d81271ab0350..5e988e5146531a82c9af01ec94654d4d6828a515 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1329,7 +1329,7 @@ handle_newline: put_tty_queue(c, ldata); smp_store_release(&ldata->canon_head, ldata->read_head); kill_fasync(&tty->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tty->read_wait, EPOLLIN); + wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM); return; } } @@ -1561,7 +1561,7 @@ static void __receive_buf(struct tty_struct *tty, const unsigned char *cp, if (read_cnt(ldata)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tty->read_wait, EPOLLIN); + wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM); } } @@ -1938,7 +1938,7 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty, more = n - (size - tail); if (eol == N_TTY_BUF_SIZE && more) { /* scan wrapped without finding set bit */ - eol = find_next_bit(ldata->read_flags, more, 0); + eol = find_first_bit(ldata->read_flags, more); found = eol != more; } else found = eol != size; diff --git a/drivers/tty/rpmsg_tty.c b/drivers/tty/rpmsg_tty.c index dae2a4e44f386d06f59b7313be74f44361b7c5c9..29db413bbc03085139b37eb32c992efc47cda2b2 100644 --- a/drivers/tty/rpmsg_tty.c +++ b/drivers/tty/rpmsg_tty.c @@ -50,10 +50,17 @@ static int rpmsg_tty_cb(struct rpmsg_device *rpdev, void *data, int len, void *p static int rpmsg_tty_install(struct tty_driver *driver, struct tty_struct *tty) { struct rpmsg_tty_port *cport = idr_find(&tty_idr, tty->index); + struct tty_port *port; tty->driver_data = cport; - return tty_port_install(&cport->port, driver, tty); + port = tty_port_get(&cport->port); + return tty_port_install(port, driver, tty); +} + +static void rpmsg_tty_cleanup(struct tty_struct *tty) +{ + tty_port_put(tty->port); } static int rpmsg_tty_open(struct tty_struct *tty, struct file *filp) @@ -106,12 +113,19 @@ static unsigned int rpmsg_tty_write_room(struct tty_struct *tty) return size; } +static void rpmsg_tty_hangup(struct tty_struct *tty) +{ + tty_port_hangup(tty->port); +} + static const struct tty_operations rpmsg_tty_ops = { .install = rpmsg_tty_install, .open = rpmsg_tty_open, .close = rpmsg_tty_close, .write = rpmsg_tty_write, .write_room = rpmsg_tty_write_room, + .hangup = rpmsg_tty_hangup, + .cleanup = rpmsg_tty_cleanup, }; static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void) @@ -137,8 +151,10 @@ static struct rpmsg_tty_port *rpmsg_tty_alloc_cport(void) return cport; } -static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport) +static void rpmsg_tty_destruct_port(struct tty_port *port) { + struct rpmsg_tty_port *cport = container_of(port, struct rpmsg_tty_port, port); + mutex_lock(&idr_lock); idr_remove(&tty_idr, cport->id); mutex_unlock(&idr_lock); @@ -146,7 +162,10 @@ static void rpmsg_tty_release_cport(struct rpmsg_tty_port *cport) kfree(cport); } -static const struct tty_port_operations rpmsg_tty_port_ops = { }; +static const struct tty_port_operations rpmsg_tty_port_ops = { + .destruct = rpmsg_tty_destruct_port, +}; + static int rpmsg_tty_probe(struct rpmsg_device *rpdev) { @@ -166,7 +185,8 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev) cport->id, dev); if (IS_ERR(tty_dev)) { ret = dev_err_probe(dev, PTR_ERR(tty_dev), "Failed to register tty port\n"); - goto err_destroy; + tty_port_put(&cport->port); + return ret; } cport->rpdev = rpdev; @@ -177,12 +197,6 @@ static int rpmsg_tty_probe(struct rpmsg_device *rpdev) rpdev->src, rpdev->dst, cport->id); return 0; - -err_destroy: - tty_port_destroy(&cport->port); - rpmsg_tty_release_cport(cport); - - return ret; } static void rpmsg_tty_remove(struct rpmsg_device *rpdev) @@ -192,13 +206,11 @@ static void rpmsg_tty_remove(struct rpmsg_device *rpdev) dev_dbg(&rpdev->dev, "Removing rpmsg tty device %d\n", cport->id); /* User hang up to release the tty */ - if (tty_port_initialized(&cport->port)) - tty_port_tty_hangup(&cport->port, false); + tty_port_tty_hangup(&cport->port, false); tty_unregister_device(rpmsg_tty_driver, cport->id); - tty_port_destroy(&cport->port); - rpmsg_tty_release_cport(cport); + tty_port_put(&cport->port); } static struct rpmsg_device_id rpmsg_driver_tty_id_table[] = { diff --git a/drivers/tty/serial/8250/8250_gsc.c b/drivers/tty/serial/8250/8250_gsc.c index 673cda3d011d0c64c0c71a19dfa3052447d21c4f..948d0a1c6ae8edc91eefaadb5037f4a311d12509 100644 --- a/drivers/tty/serial/8250/8250_gsc.c +++ b/drivers/tty/serial/8250/8250_gsc.c @@ -26,7 +26,7 @@ static int __init serial_init_chip(struct parisc_device *dev) unsigned long address; int err; -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) && defined(CONFIG_IOSAPIC) if (!dev->irq && (dev->id.sversion == 0xad)) dev->irq = iosapic_serial_irq(dev); #endif diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index bce28729dd7bd9aacda70b75a68d9f8396dd544d..be8626234627e4a9f96246a027ab48a6603954af 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -83,8 +83,17 @@ static int of_platform_serial_setup(struct platform_device *ofdev, port->mapsize = resource_size(&resource); /* Check for shifted address mapping */ - if (of_property_read_u32(np, "reg-offset", &prop) == 0) + if (of_property_read_u32(np, "reg-offset", &prop) == 0) { + if (prop >= port->mapsize) { + dev_warn(&ofdev->dev, "reg-offset %u exceeds region size %pa\n", + prop, &port->mapsize); + ret = -EINVAL; + goto err_unprepare; + } + port->mapbase += prop; + port->mapsize -= prop; + } port->iotype = UPIO_MEM; if (of_property_read_u32(np, "reg-io-width", &prop) == 0) { diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index e8b5469e9dfa1267cba98d60fbad0dfee49bcfc6..e17e97ea86fadebf7b488c06e501697cee80952f 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -4779,8 +4779,30 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */ pbn_b2_4_115200 }, + /* Brainboxes Devices */ /* - * BrainBoxes UC-260 + * Brainboxes UC-101 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0BA1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-235/246 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0AA1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_1_115200 }, + /* + * Brainboxes UC-257 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0861, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-260/271/701/756 */ { PCI_VENDOR_ID_INTASHIELD, 0x0D21, PCI_ANY_ID, PCI_ANY_ID, @@ -4788,7 +4810,81 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_b2_4_115200 }, { PCI_VENDOR_ID_INTASHIELD, 0x0E34, PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-268 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0841, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-275/279 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0881, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_8_115200 }, + /* + * Brainboxes UC-302 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08E1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-310 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08C1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-313 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08A3, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-320/324 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0A61, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_1_115200 }, + /* + * Brainboxes UC-346 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0B02, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-357 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0A81, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0A83, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-368 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0C41, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-420/431 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0921, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, pbn_b2_4_115200 }, /* * Perle PCI-RAS cards diff --git a/drivers/tty/serial/8250/8250_pericom.c b/drivers/tty/serial/8250/8250_pericom.c index 025b055363c3db7268c24abdf245461e9d4e771f..95ff10f25d58aa5c7e5db838b76a766245849f90 100644 --- a/drivers/tty/serial/8250/8250_pericom.c +++ b/drivers/tty/serial/8250/8250_pericom.c @@ -117,7 +117,7 @@ static int pericom8250_probe(struct pci_dev *pdev, const struct pci_device_id *i uart.port.private_data = pericom; uart.port.iotype = UPIO_PORT; uart.port.uartclk = 921600 * 16; - uart.port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ | UPF_MAGIC_MULTIPLIER; + uart.port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ; uart.port.set_divisor = pericom_do_set_divisor; for (i = 0; i < nr && i < maxnr; i++) { unsigned int offset = (i == 3 && nr == 4) ? 0x38 : i * 0x8; diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 2abb3de11a48b183c22808be30ae8597eea5e614..3b12bfc1ed67b1f4d3e9d1e92d65cbc234541567 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2056,7 +2056,10 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state) serial8250_rpm_put(up); } -static void wait_for_lsr(struct uart_8250_port *up, int bits) +/* + * Wait for transmitter & holding register to empty + */ +static void wait_for_xmitr(struct uart_8250_port *up, int bits) { unsigned int status, tmout = 10000; @@ -2073,16 +2076,6 @@ static void wait_for_lsr(struct uart_8250_port *up, int bits) udelay(1); touch_nmi_watchdog(); } -} - -/* - * Wait for transmitter & holding register to empty - */ -static void wait_for_xmitr(struct uart_8250_port *up, int bits) -{ - unsigned int tmout; - - wait_for_lsr(up, bits); /* Wait up to 1s for flow control if necessary */ if (up->port.flags & UPF_CONS_FLOW) { @@ -3332,35 +3325,6 @@ static void serial8250_console_restore(struct uart_8250_port *up) serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS); } -/* - * Print a string to the serial port using the device FIFO - * - * It sends fifosize bytes and then waits for the fifo - * to get empty. - */ -static void serial8250_console_fifo_write(struct uart_8250_port *up, - const char *s, unsigned int count) -{ - int i; - const char *end = s + count; - unsigned int fifosize = up->port.fifosize; - bool cr_sent = false; - - while (s != end) { - wait_for_lsr(up, UART_LSR_THRE); - - for (i = 0; i < fifosize && s != end; ++i) { - if (*s == '\n' && !cr_sent) { - serial_out(up, UART_TX, '\r'); - cr_sent = true; - } else { - serial_out(up, UART_TX, *s++); - cr_sent = false; - } - } - } -} - /* * Print a string to the serial port trying not to disturb * any possible real use of the port... @@ -3376,7 +3340,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, struct uart_8250_em485 *em485 = up->em485; struct uart_port *port = &up->port; unsigned long flags; - unsigned int ier, use_fifo; + unsigned int ier; int locked = 1; touch_nmi_watchdog(); @@ -3408,20 +3372,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s, mdelay(port->rs485.delay_rts_before_send); } - use_fifo = (up->capabilities & UART_CAP_FIFO) && - port->fifosize > 1 && - (serial_port_in(port, UART_FCR) & UART_FCR_ENABLE_FIFO) && - /* - * After we put a data in the fifo, the controller will send - * it regardless of the CTS state. Therefore, only use fifo - * if we don't use control flow. - */ - !(up->port.flags & UPF_CONS_FLOW); - - if (likely(use_fifo)) - serial8250_console_fifo_write(up, s, count); - else - uart_console_write(port, s, count, serial8250_console_putchar); + uart_console_write(port, s, count, serial8250_console_putchar); /* * Finally, wait for transmitter to become empty diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index 1f1df46242f98cf4ba96b51c75d9c3025a45a4d6..ba053a68529f771d81cd3c7cf73ce938859da50b 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -1582,9 +1582,6 @@ static void pl011_set_mctrl(struct uart_port *port, unsigned int mctrl) container_of(port, struct uart_amba_port, port); unsigned int cr; - if (port->rs485.flags & SER_RS485_ENABLED) - mctrl &= ~TIOCM_RTS; - cr = pl011_read(uap, REG_CR); #define TIOCMBIT(tiocmbit, uartbit) \ @@ -1808,14 +1805,8 @@ static int pl011_startup(struct uart_port *port) cr &= UART011_CR_RTS | UART011_CR_DTR; cr |= UART01x_CR_UARTEN | UART011_CR_RXE; - if (port->rs485.flags & SER_RS485_ENABLED) { - if (port->rs485.flags & SER_RS485_RTS_AFTER_SEND) - cr &= ~UART011_CR_RTS; - else - cr |= UART011_CR_RTS; - } else { + if (!(port->rs485.flags & SER_RS485_ENABLED)) cr |= UART011_CR_TXE; - } pl011_write(cr, uap, REG_CR); diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index dc40c4155356bfc9266b191212436ffadcd4d8a2..0db90be4c3bc3c738b49825fce06221fdcb840a8 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -144,6 +144,11 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) unsigned long flags; unsigned int old; + if (port->rs485.flags & SER_RS485_ENABLED) { + set &= ~TIOCM_RTS; + clear &= ~TIOCM_RTS; + } + spin_lock_irqsave(&port->lock, flags); old = port->mctrl; port->mctrl = (old & ~clear) | set; @@ -157,23 +162,10 @@ uart_update_mctrl(struct uart_port *port, unsigned int set, unsigned int clear) static void uart_port_dtr_rts(struct uart_port *uport, int raise) { - int rs485_on = uport->rs485_config && - (uport->rs485.flags & SER_RS485_ENABLED); - int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND); - - if (raise) { - if (rs485_on && RTS_after_send) { - uart_set_mctrl(uport, TIOCM_DTR); - uart_clear_mctrl(uport, TIOCM_RTS); - } else { - uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS); - } - } else { - unsigned int clear = TIOCM_DTR; - - clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0; - uart_clear_mctrl(uport, clear); - } + if (raise) + uart_set_mctrl(uport, TIOCM_DTR | TIOCM_RTS); + else + uart_clear_mctrl(uport, TIOCM_DTR | TIOCM_RTS); } /* @@ -1075,11 +1067,6 @@ uart_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) goto out; if (!tty_io_error(tty)) { - if (uport->rs485.flags & SER_RS485_ENABLED) { - set &= ~TIOCM_RTS; - clear &= ~TIOCM_RTS; - } - uart_update_mctrl(uport, set, clear); ret = 0; } @@ -2390,6 +2377,9 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, */ spin_lock_irqsave(&port->lock, flags); port->mctrl &= TIOCM_DTR; + if (port->rs485.flags & SER_RS485_ENABLED && + !(port->rs485.flags & SER_RS485_RTS_AFTER_SEND)) + port->mctrl |= TIOCM_RTS; port->ops->set_mctrl(port, port->mctrl); spin_unlock_irqrestore(&port->lock, flags); diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 1f89ab0e49ac4d61e8c5bc7486c0d2c05bf9bbc3..9570002d07e751b27c23911a97b4682e0f47c945 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -550,11 +550,23 @@ static void stm32_usart_transmit_chars(struct uart_port *port) struct stm32_port *stm32_port = to_stm32_port(port); const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; struct circ_buf *xmit = &port->state->xmit; + u32 isr; + int ret; if (port->x_char) { if (stm32_usart_tx_dma_started(stm32_port) && stm32_usart_tx_dma_enabled(stm32_port)) stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT); + + /* Check that TDR is empty before filling FIFO */ + ret = + readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr, + isr, + (isr & USART_SR_TXE), + 10, 1000); + if (ret) + dev_warn(port->dev, "1 character may be erased\n"); + writel_relaxed(port->x_char, port->membase + ofs->tdr); port->x_char = 0; port->icount.tx++; @@ -730,7 +742,7 @@ static void stm32_usart_start_tx(struct uart_port *port) struct serial_rs485 *rs485conf = &port->rs485; struct circ_buf *xmit = &port->state->xmit; - if (uart_circ_empty(xmit)) + if (uart_circ_empty(xmit) && !port->x_char) return; if (rs485conf->flags & SER_RS485_ENABLED) { diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 3639bb6dc372e288db75960253de4663a0dede06..58013698635f013d6ef2f1f138860ab8d82ca3db 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -599,8 +599,8 @@ static int vt_setactivate(struct vt_setactivate __user *sa) if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES) return -ENXIO; - vsa.console = array_index_nospec(vsa.console, MAX_NR_CONSOLES + 1); vsa.console--; + vsa.console = array_index_nospec(vsa.console, MAX_NR_CONSOLES); console_lock(); ret = vc_allocate(vsa.console); if (ret) { @@ -845,6 +845,7 @@ int vt_ioctl(struct tty_struct *tty, return -ENXIO; arg--; + arg = array_index_nospec(arg, MAX_NR_CONSOLES); console_lock(); ret = vc_allocate(arg); console_unlock(); diff --git a/drivers/usb/cdns3/drd.c b/drivers/usb/cdns3/drd.c index 55c73b1d870471bff520b619fa27b6a94ee752d8..d00ff98dffabfa9cc1fdcde47212b9a0af843b7e 100644 --- a/drivers/usb/cdns3/drd.c +++ b/drivers/usb/cdns3/drd.c @@ -483,11 +483,11 @@ int cdns_drd_exit(struct cdns *cdns) /* Indicate the cdns3 core was power lost before */ bool cdns_power_is_lost(struct cdns *cdns) { - if (cdns->version == CDNS3_CONTROLLER_V1) { - if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0))) + if (cdns->version == CDNS3_CONTROLLER_V0) { + if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0))) return true; } else { - if (!(readl(&cdns->otg_v0_regs->simulate) & BIT(0))) + if (!(readl(&cdns->otg_v1_regs->simulate) & BIT(0))) return true; } return false; diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index 4169cf40a03b59c79cf4d7520a9aec859c2b950e..5509d3847af4b9787b166e22bd09ac4f6e79b8d1 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -39,8 +39,11 @@ static int ulpi_match(struct device *dev, struct device_driver *driver) struct ulpi *ulpi = to_ulpi_dev(dev); const struct ulpi_device_id *id; - /* Some ULPI devices don't have a vendor id so rely on OF match */ - if (ulpi->id.vendor == 0) + /* + * Some ULPI devices don't have a vendor id + * or provide an id_table so rely on OF match. + */ + if (ulpi->id.vendor == 0 || !drv->id_table) return of_driver_match_device(dev, driver); for (id = drv->id_table; id->vendor; id++) @@ -127,6 +130,7 @@ static const struct attribute_group *ulpi_dev_attr_groups[] = { static void ulpi_dev_release(struct device *dev) { + of_node_put(dev->of_node); kfree(to_ulpi_dev(dev)); } @@ -244,12 +248,16 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) return ret; ret = ulpi_read_id(ulpi); - if (ret) + if (ret) { + of_node_put(ulpi->dev.of_node); return ret; + } ret = device_register(&ulpi->dev); - if (ret) + if (ret) { + put_device(&ulpi->dev); return ret; + } dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n", ulpi->id.vendor, ulpi->id.product); @@ -296,7 +304,6 @@ EXPORT_SYMBOL_GPL(ulpi_register_interface); */ void ulpi_unregister_interface(struct ulpi *ulpi) { - of_node_put(ulpi->dev.of_node); device_unregister(&ulpi->dev); } EXPORT_SYMBOL_GPL(ulpi_unregister_interface); diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 3e01dd6e509b607e8e63ac8a6740fe3235d351df..d9712c2602afe221e480392bd8eaad018d0daf83 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1563,6 +1563,13 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags) urb->hcpriv = NULL; INIT_LIST_HEAD(&urb->urb_list); atomic_dec(&urb->use_count); + /* + * Order the write of urb->use_count above before the read + * of urb->reject below. Pairs with the memory barriers in + * usb_kill_urb() and usb_poison_urb(). + */ + smp_mb__after_atomic(); + atomic_dec(&urb->dev->urbnum); if (atomic_read(&urb->reject)) wake_up(&usb_kill_urb_queue); @@ -1665,6 +1672,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb) usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); + /* + * Order the write of urb->use_count above before the read + * of urb->reject below. Pairs with the memory barriers in + * usb_kill_urb() and usb_poison_urb(). + */ + smp_mb__after_atomic(); + if (unlikely(atomic_read(&urb->reject))) wake_up(&usb_kill_urb_queue); usb_put_urb(urb); diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c index c2bbf97a79bec1ad7458024e9b8da6b844732b72..d5bc36ca5b1f7721f0968780659c3619808b5a6f 100644 --- a/drivers/usb/core/port.c +++ b/drivers/usb/core/port.c @@ -602,11 +602,14 @@ int usb_hub_create_port_device(struct usb_hub *hub, int port1) return retval; } - find_and_link_peer(hub, port1); - retval = component_add(&port_dev->dev, &connector_ops); - if (retval) + if (retval) { dev_warn(&port_dev->dev, "failed to add component\n"); + device_unregister(&port_dev->dev); + return retval; + } + + find_and_link_peer(hub, port1); /* * Enable runtime pm and hold a refernce that hub_configure() diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 30727729a44cc06adeb50745e227ab8f6dbeab29..33d62d7e3929f1396ab8ca201d8214446fc5c144 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -715,6 +715,12 @@ void usb_kill_urb(struct urb *urb) if (!(urb && urb->dev && urb->ep)) return; atomic_inc(&urb->reject); + /* + * Order the write of urb->reject above before the read + * of urb->use_count below. Pairs with the barriers in + * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). + */ + smp_mb__after_atomic(); usb_hcd_unlink_urb(urb, -ENOENT); wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0); @@ -756,6 +762,12 @@ void usb_poison_urb(struct urb *urb) if (!urb) return; atomic_inc(&urb->reject); + /* + * Order the write of urb->reject above before the read + * of urb->use_count below. Pairs with the barriers in + * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). + */ + smp_mb__after_atomic(); if (!urb->dev || !urb->ep) return; diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 2bc03f41c70adb8e7d2e4e07b0ed97a6b022b58c..eee3504397e6e56fcf4809e948698ff21f32954a 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -5097,7 +5097,7 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg) hsotg->gadget.speed = USB_SPEED_UNKNOWN; spin_unlock_irqrestore(&hsotg->lock, flags); - for (ep = 0; ep < hsotg->num_of_eps; ep++) { + for (ep = 1; ep < hsotg->num_of_eps; ep++) { if (hsotg->eps_in[ep]) dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); if (hsotg->eps_out[ep]) diff --git a/drivers/usb/dwc3/dwc3-xilinx.c b/drivers/usb/dwc3/dwc3-xilinx.c index 9cc3ad701a295297d4d0c72c01bf4020444440cf..a6f3a9b38789e0acd871e52fa97f891d7e8cab84 100644 --- a/drivers/usb/dwc3/dwc3-xilinx.c +++ b/drivers/usb/dwc3/dwc3-xilinx.c @@ -99,17 +99,29 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) struct device *dev = priv_data->dev; struct reset_control *crst, *hibrst, *apbrst; struct phy *usb3_phy; - int ret; + int ret = 0; u32 reg; - usb3_phy = devm_phy_get(dev, "usb3-phy"); - if (PTR_ERR(usb3_phy) == -EPROBE_DEFER) { - ret = -EPROBE_DEFER; + usb3_phy = devm_phy_optional_get(dev, "usb3-phy"); + if (IS_ERR(usb3_phy)) { + ret = PTR_ERR(usb3_phy); + dev_err_probe(dev, ret, + "failed to get USB3 PHY\n"); goto err; - } else if (IS_ERR(usb3_phy)) { - usb3_phy = NULL; } + /* + * The following core resets are not required unless a USB3 PHY + * is used, and the subsequent register settings are not required + * unless a core reset is performed (they should be set properly + * by the first-stage boot loader, but may be reverted by a core + * reset). They may also break the configuration if USB3 is actually + * in use but the usb3-phy entry is missing from the device tree. + * Therefore, skip these operations in this case. + */ + if (!usb3_phy) + goto skip_usb3_phy; + crst = devm_reset_control_get_exclusive(dev, "usb_crst"); if (IS_ERR(crst)) { ret = PTR_ERR(crst); @@ -188,6 +200,7 @@ static int dwc3_xlnx_init_zynqmp(struct dwc3_xlnx *priv_data) goto err; } +skip_usb3_phy: /* * This routes the USB DMA traffic to go through FPD path instead * of reaching DDR directly. This traffic routing is needed to diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 520031ba38aaed1cfc230eaef06b0fea6b4aab59..183b90923f51ba9223108471b77eb4a6dd32131e 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1291,6 +1291,19 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, if (usb_endpoint_xfer_bulk(dep->endpoint.desc) && dep->stream_capable) trb->ctrl |= DWC3_TRB_CTRL_SID_SOFN(stream_id); + /* + * As per data book 4.2.3.2TRB Control Bit Rules section + * + * The controller autonomously checks the HWO field of a TRB to determine if the + * entire TRB is valid. Therefore, software must ensure that the rest of the TRB + * is valid before setting the HWO field to '1'. In most systems, this means that + * software must update the fourth DWORD of a TRB last. + * + * However there is a possibility of CPU re-ordering here which can cause + * controller to observe the HWO bit set prematurely. + * Add a write memory barrier to prevent CPU re-ordering. + */ + wmb(); trb->ctrl |= DWC3_TRB_CTRL_HWO; dwc3_ep_inc_enq(dep); diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 16f9e3423c9faaa59ddab3e8e62d36e49b2dd719..9315313108c9d50432ee80ba7b801b8a1e1c036b 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1988,6 +1988,9 @@ unknown: if (w_index != 0x5 || (w_value >> 8)) break; interface = w_value & 0xFF; + if (interface >= MAX_CONFIG_INTERFACES || + !os_desc_cfg->interface[interface]) + break; buf[6] = w_index; count = count_ext_prop(os_desc_cfg, interface); diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 25ad1e97a458589363eb67fe15b1688faa171f0e..1922fd02043c527678232a46835a9b9131da320b 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1711,16 +1711,24 @@ static void ffs_data_put(struct ffs_data *ffs) static void ffs_data_closed(struct ffs_data *ffs) { + struct ffs_epfile *epfiles; + unsigned long flags; + ENTER(); if (atomic_dec_and_test(&ffs->opened)) { if (ffs->no_disconnect) { ffs->state = FFS_DEACTIVATED; - if (ffs->epfiles) { - ffs_epfiles_destroy(ffs->epfiles, - ffs->eps_count); - ffs->epfiles = NULL; - } + spin_lock_irqsave(&ffs->eps_lock, flags); + epfiles = ffs->epfiles; + ffs->epfiles = NULL; + spin_unlock_irqrestore(&ffs->eps_lock, + flags); + + if (epfiles) + ffs_epfiles_destroy(epfiles, + ffs->eps_count); + if (ffs->setup_state == FFS_SETUP_PENDING) __ffs_ep0_stall(ffs); } else { @@ -1767,14 +1775,27 @@ static struct ffs_data *ffs_data_new(const char *dev_name) static void ffs_data_clear(struct ffs_data *ffs) { + struct ffs_epfile *epfiles; + unsigned long flags; + ENTER(); ffs_closed(ffs); BUG_ON(ffs->gadget); - if (ffs->epfiles) { - ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count); + spin_lock_irqsave(&ffs->eps_lock, flags); + epfiles = ffs->epfiles; + ffs->epfiles = NULL; + spin_unlock_irqrestore(&ffs->eps_lock, flags); + + /* + * potential race possible between ffs_func_eps_disable + * & ffs_epfile_release therefore maintaining a local + * copy of epfile will save us from use-after-free. + */ + if (epfiles) { + ffs_epfiles_destroy(epfiles, ffs->eps_count); ffs->epfiles = NULL; } @@ -1922,12 +1943,15 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count) static void ffs_func_eps_disable(struct ffs_function *func) { - struct ffs_ep *ep = func->eps; - struct ffs_epfile *epfile = func->ffs->epfiles; - unsigned count = func->ffs->eps_count; + struct ffs_ep *ep; + struct ffs_epfile *epfile; + unsigned short count; unsigned long flags; spin_lock_irqsave(&func->ffs->eps_lock, flags); + count = func->ffs->eps_count; + epfile = func->ffs->epfiles; + ep = func->eps; while (count--) { /* pending requests get nuked */ if (ep->ep) @@ -1945,14 +1969,18 @@ static void ffs_func_eps_disable(struct ffs_function *func) static int ffs_func_eps_enable(struct ffs_function *func) { - struct ffs_data *ffs = func->ffs; - struct ffs_ep *ep = func->eps; - struct ffs_epfile *epfile = ffs->epfiles; - unsigned count = ffs->eps_count; + struct ffs_data *ffs; + struct ffs_ep *ep; + struct ffs_epfile *epfile; + unsigned short count; unsigned long flags; int ret = 0; spin_lock_irqsave(&func->ffs->eps_lock, flags); + ffs = func->ffs; + ep = func->eps; + epfile = ffs->epfiles; + count = ffs->eps_count; while(count--) { ep->ep->driver_data = ep; diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c index 1abf08e5164af9e9cba03adbe52be6aa6b5713fd..6803cd60cc6dc3147b1901c56cf28f9dda2da607 100644 --- a/drivers/usb/gadget/function/f_sourcesink.c +++ b/drivers/usb/gadget/function/f_sourcesink.c @@ -584,6 +584,7 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in, if (is_iso) { switch (speed) { + case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: size = ss->isoc_maxpacket * (ss->isoc_mult + 1) * diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 36fa6ef0581b5694db7a6bb227951b3b72abf5f1..097a709549d608a15f6d0248ac8c05cad4a52c04 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -203,7 +203,7 @@ static struct uac2_input_terminal_descriptor io_in_it_desc = { .bDescriptorSubtype = UAC_INPUT_TERMINAL, /* .bTerminalID = DYNAMIC */ - .wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_UNDEFINED), + .wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_MICROPHONE), .bAssocTerminal = 0, /* .bCSourceID = DYNAMIC */ .iChannelNames = 0, @@ -231,7 +231,7 @@ static struct uac2_output_terminal_descriptor io_out_ot_desc = { .bDescriptorSubtype = UAC_OUTPUT_TERMINAL, /* .bTerminalID = DYNAMIC */ - .wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_UNDEFINED), + .wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_SPEAKER), .bAssocTerminal = 0, /* .bSourceID = DYNAMIC */ /* .bCSourceID = DYNAMIC */ diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index 64de9f1b874c55a3fdb4dddb49ecbdc4eec0aa1b..b7ccf18036568a086621cdc0e2a40a1b24b094b3 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -637,14 +637,17 @@ static int rndis_set_response(struct rndis_params *params, rndis_set_cmplt_type *resp; rndis_resp_t *r; + BufLength = le32_to_cpu(buf->InformationBufferLength); + BufOffset = le32_to_cpu(buf->InformationBufferOffset); + if ((BufLength > RNDIS_MAX_TOTAL_SIZE) || + (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE)) + return -EINVAL; + r = rndis_add_response(params, sizeof(rndis_set_cmplt_type)); if (!r) return -ENOMEM; resp = (rndis_set_cmplt_type *)r->buf; - BufLength = le32_to_cpu(buf->InformationBufferLength); - BufOffset = le32_to_cpu(buf->InformationBufferOffset); - #ifdef VERBOSE_DEBUG pr_debug("%s: Length: %d\n", __func__, BufLength); pr_debug("%s: Offset: %d\n", __func__, BufOffset); @@ -1117,7 +1120,7 @@ static int rndis_proc_show(struct seq_file *m, void *v) static ssize_t rndis_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - rndis_params *p = PDE_DATA(file_inode(file)); + rndis_params *p = pde_data(file_inode(file)); u32 speed = 0; int i, fl_speed = 0; @@ -1161,7 +1164,7 @@ static ssize_t rndis_proc_write(struct file *file, const char __user *buffer, static int rndis_proc_open(struct inode *inode, struct file *file) { - return single_open(file, rndis_proc_show, PDE_DATA(inode)); + return single_open(file, rndis_proc_show, pde_data(inode)); } static const struct proc_ops rndis_proc_ops = { diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index c5a2c734234a5cc0eeda1a223612089c346de535..d86c3a36441ee998f8a8a8f8813ad5478d786281 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -1004,7 +1004,7 @@ static int raw_process_ep_io(struct raw_dev *dev, struct usb_raw_ep_io *io, ret = -EBUSY; goto out_unlock; } - if ((in && !ep->ep->caps.dir_in) || (!in && ep->ep->caps.dir_in)) { + if (in != usb_endpoint_dir_in(ep->ep->desc)) { dev_dbg(&dev->gadget->dev, "fail, wrong direction\n"); ret = -EINVAL; goto out_unlock; diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c index dd0819df096e1a87d9cb8cdacf2fe51cf13f9705..9040a056146656207645c41969896b4a88257bd6 100644 --- a/drivers/usb/gadget/udc/at91_udc.c +++ b/drivers/usb/gadget/udc/at91_udc.c @@ -1895,7 +1895,7 @@ static int at91udc_probe(struct platform_device *pdev) at91_vbus_irq, 0, driver_name, udc); if (retval) { DBG("request vbus irq %d failed\n", - udc->board.vbus_pin); + desc_to_gpio(udc->board.vbus_pin)); goto err_unprepare_iclk; } } diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 57d417a7c3e0a687e9fbc11b604b7bdb93aedc8f..601829a6b4badd7f0fd98805edcb0659f28f4a0f 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2378,6 +2378,8 @@ static void handle_ext_role_switch_states(struct device *dev, switch (role) { case USB_ROLE_NONE: usb3->connection_state = USB_ROLE_NONE; + if (cur_role == USB_ROLE_HOST) + device_release_driver(host); if (usb3->driver) usb3_disconnect(usb3); usb3_vbus_out(usb3, false); diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index c1edcc9b13cece07c9dc0e00c27a7f0848f4875c..dc570ce4e8319ad09d6088e3dc2ee557ccba5c78 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -437,6 +437,9 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); int ret; + if (pm_runtime_suspended(dev)) + pm_runtime_resume(dev); + ret = xhci_priv_suspend_quirk(hcd); if (ret) return ret; diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index 507deef1f709327d95ae5ed5836c1eb10c2620d3..04c4e3fed09460968c4e9af6ccb283b19a37bbaa 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -543,6 +543,9 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, if (of_property_read_u16_array(np, "language-id", &hub->lang_id, 1)) hub->lang_id = USB251XB_DEF_LANGUAGE_ID; + if (of_property_read_u8(np, "boost-up", &hub->boost_up)) + hub->boost_up = USB251XB_DEF_BOOST_UP; + cproperty_char = of_get_property(np, "manufacturer", NULL); strlcpy(str, cproperty_char ? : USB251XB_DEF_MANUFACTURER_STRING, sizeof(str)); @@ -584,7 +587,6 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, * may be as soon as needed. */ hub->bat_charge_en = USB251XB_DEF_BATTERY_CHARGING_ENABLE; - hub->boost_up = USB251XB_DEF_BOOST_UP; hub->boost_57 = USB251XB_DEF_BOOST_57; hub->boost_14 = USB251XB_DEF_BOOST_14; hub->port_map12 = USB251XB_DEF_PORT_MAP_12; diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 29f4b87a9e74362d4999eefafe0422b4eb48740c..58cba8ee0277ae7fe23021b96ed78211f2bb01ca 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -85,6 +85,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1a86, 0x5523) }, { USB_DEVICE(0x1a86, 0x7522) }, { USB_DEVICE(0x1a86, 0x7523) }, + { USB_DEVICE(0x2184, 0x0057) }, { USB_DEVICE(0x4348, 0x5523) }, { USB_DEVICE(0x9986, 0x7523) }, { }, diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 8a60c0d56863eef53e3d383e34fd100ac0c5c941..a27f7efcec6a8ff8f9ae6ed670f5c8754fd7e78f 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -51,6 +51,7 @@ static void cp210x_enable_event_mode(struct usb_serial_port *port); static void cp210x_disable_event_mode(struct usb_serial_port *port); static const struct usb_device_id id_table[] = { + { USB_DEVICE(0x0404, 0x034C) }, /* NCR Retail IO Box */ { USB_DEVICE(0x045B, 0x0053) }, /* Renesas RX610 RX-Stick */ { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ @@ -68,6 +69,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */ { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */ { USB_DEVICE(0x0FDE, 0xCA05) }, /* OWL Wireless Electricity Monitor CM-160 */ + { USB_DEVICE(0x106F, 0x0003) }, /* CPI / Money Controls Bulk Coin Recycler */ { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */ { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */ { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 4edebd14ef29a64b1bcd7536622c17e732bb4f90..49c08f07c96901193efcd960c5d9634c78d82924 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -969,6 +969,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_023_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_034_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_101_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_159_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_3_PID) }, @@ -977,12 +978,14 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_6_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_7_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_8_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_235_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_257_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_4_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_313_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_320_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_324_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_2_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 755858ca20bacf7702efc2d42c68b135cbc984f3..d1a9564697a4becf6f32b52461eef0fe36c1bbc0 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1506,6 +1506,9 @@ #define BRAINBOXES_VX_023_PID 0x1003 /* VX-023 ExpressCard 1 Port RS422/485 */ #define BRAINBOXES_VX_034_PID 0x1004 /* VX-034 ExpressCard 2 Port RS422/485 */ #define BRAINBOXES_US_101_PID 0x1011 /* US-101 1xRS232 */ +#define BRAINBOXES_US_159_PID 0x1021 /* US-159 1xRS232 */ +#define BRAINBOXES_US_235_PID 0x1017 /* US-235 1xRS232 */ +#define BRAINBOXES_US_320_PID 0x1019 /* US-320 1xRS422/485 */ #define BRAINBOXES_US_324_PID 0x1013 /* US-324 1xRS422/485 1Mbaud */ #define BRAINBOXES_US_606_1_PID 0x2001 /* US-606 6 Port RS232 Serial Port 1 and 2 */ #define BRAINBOXES_US_606_2_PID 0x2002 /* US-606 6 Port RS232 Serial Port 3 and 4 */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 42420bfc983c2d5edb4879a8814dcb34d76fe061..962e9943fc20ec8a1f6b57905f724587037acd26 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1649,6 +1649,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1481, 0xff, 0x00, 0x00) }, /* ZTE MF871A */ + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1485, 0xff, 0xff, 0xff), /* ZTE MF286D */ + .driver_info = RSVD(5) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 29191d33c0e3edf43c4a1be70adc03e902b3ce53..1a05e3dcfec8a10652ec437bc0d80ce38c401d50 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2301,6 +2301,16 @@ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, US_FL_SCM_MULT_TARG ), +/* + * Reported by DocMAX + * and Thomas Weißschuh + */ +UNUSUAL_DEV( 0x2109, 0x0715, 0x9999, 0x9999, + "VIA Labs, Inc.", + "VL817 SATA Bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001, "ST", "2A", diff --git a/drivers/usb/typec/port-mapper.c b/drivers/usb/typec/port-mapper.c index 07d307418b470bfa259880a0fa67fd29985dea1c..a7d507802509f33aead9cffb53a74257d2bcd6c0 100644 --- a/drivers/usb/typec/port-mapper.c +++ b/drivers/usb/typec/port-mapper.c @@ -56,7 +56,12 @@ int typec_link_ports(struct typec_port *con) { struct each_port_arg arg = { .port = con, .match = NULL }; + if (!has_acpi_companion(&con->dev)) + return 0; + bus_for_each_dev(&acpi_bus_type, NULL, &arg, typec_port_match); + if (!arg.match) + return 0; /* * REVISIT: Now each connector can have only a single component master. @@ -74,5 +79,6 @@ int typec_link_ports(struct typec_port *con) void typec_unlink_ports(struct typec_port *con) { - component_master_del(&con->dev, &typec_aggregate_ops); + if (has_acpi_companion(&con->dev)) + component_master_del(&con->dev, &typec_aggregate_ops); } diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 35a1307349a20698d5cb95452663d3ab7095101b..e07d26a3cd8e1d5d75bbdb50cd11243137439409 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -75,9 +75,25 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val) static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc) { struct tcpci *tcpci = tcpc_to_tcpci(tcpc); + bool vconn_pres; + enum typec_cc_polarity polarity = TYPEC_POLARITY_CC1; unsigned int reg; int ret; + ret = regmap_read(tcpci->regmap, TCPC_POWER_STATUS, ®); + if (ret < 0) + return ret; + + vconn_pres = !!(reg & TCPC_POWER_STATUS_VCONN_PRES); + if (vconn_pres) { + ret = regmap_read(tcpci->regmap, TCPC_TCPC_CTRL, ®); + if (ret < 0) + return ret; + + if (reg & TCPC_TCPC_CTRL_ORIENTATION) + polarity = TYPEC_POLARITY_CC2; + } + switch (cc) { case TYPEC_CC_RA: reg = (TCPC_ROLE_CTRL_CC_RA << TCPC_ROLE_CTRL_CC1_SHIFT) | @@ -112,6 +128,16 @@ static int tcpci_set_cc(struct tcpc_dev *tcpc, enum typec_cc_status cc) break; } + if (vconn_pres) { + if (polarity == TYPEC_POLARITY_CC2) { + reg &= ~(TCPC_ROLE_CTRL_CC1_MASK << TCPC_ROLE_CTRL_CC1_SHIFT); + reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC1_SHIFT); + } else { + reg &= ~(TCPC_ROLE_CTRL_CC2_MASK << TCPC_ROLE_CTRL_CC2_SHIFT); + reg |= (TCPC_ROLE_CTRL_CC_OPEN << TCPC_ROLE_CTRL_CC2_SHIFT); + } + } + ret = regmap_write(tcpci->regmap, TCPC_ROLE_CTRL, reg); if (ret < 0) return ret; diff --git a/drivers/usb/typec/tcpm/tcpci.h b/drivers/usb/typec/tcpm/tcpci.h index 2be7a77d400ef9da9f0c22b8d4076379d190f1a8..b2edd45f13c681a5a6a0a4b49f9c389e537f7f4f 100644 --- a/drivers/usb/typec/tcpm/tcpci.h +++ b/drivers/usb/typec/tcpm/tcpci.h @@ -98,6 +98,7 @@ #define TCPC_POWER_STATUS_SOURCING_VBUS BIT(4) #define TCPC_POWER_STATUS_VBUS_DET BIT(3) #define TCPC_POWER_STATUS_VBUS_PRES BIT(2) +#define TCPC_POWER_STATUS_VCONN_PRES BIT(1) #define TCPC_POWER_STATUS_SINKING_VBUS BIT(0) #define TCPC_FAULT_STATUS 0x1f diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 59d4fa2443f2b44449c53a8cb4ccd250e0664189..5fce795b69c7f44b63a7f9401b5293c205cd2d4e 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -5156,7 +5156,8 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port) case SNK_TRYWAIT_DEBOUNCE: break; case SNK_ATTACH_WAIT: - tcpm_set_state(port, SNK_UNATTACHED, 0); + case SNK_DEBOUNCED: + /* Do nothing, as TCPM is still waiting for vbus to reaach VSAFE5V to connect */ break; case SNK_NEGOTIATE_CAPABILITIES: @@ -5263,6 +5264,10 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port) case PR_SWAP_SNK_SRC_SOURCE_ON: /* Do nothing, vsafe0v is expected during transition */ break; + case SNK_ATTACH_WAIT: + case SNK_DEBOUNCED: + /*Do nothing, still waiting for VSAFE5V for connect */ + break; default: if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled) tcpm_set_state(port, SNK_UNATTACHED, 0); diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index bff96d64dddffee6857125ad6040f5d67a873847..6db7c8ddd51cd0aac16d89b86e83963fe39af70b 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -325,7 +325,7 @@ static int ucsi_ccg_init(struct ucsi_ccg *uc) if (status < 0) return status; - if (!data) + if (!(data & DEV_INT)) return 0; status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data)); diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index 840d9813b0bc693cc9696d515beb701bf2e80393..fcc46380e7c91409f649b1e73c46ba25e794adf3 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -78,6 +78,26 @@ config FRAMEBUFFER_CONSOLE help Low-level framebuffer-based console driver. +config FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION + bool "Enable legacy fbcon hardware acceleration code" + depends on FRAMEBUFFER_CONSOLE + default y if PARISC + default n + help + This option enables the fbcon (framebuffer text-based) hardware + acceleration for graphics drivers which were written for the fbdev + graphics interface. + + On modern machines, on mainstream machines (like x86-64) or when + using a modern Linux distribution those fbdev drivers usually aren't used. + So enabling this option wouldn't have any effect, which is why you want + to disable this option on such newer machines. + + If you compile this kernel for older machines which still require the + fbdev drivers, you may want to say Y. + + If unsure, select n. + config FRAMEBUFFER_CONSOLE_DETECT_PRIMARY bool "Map the console to the primary display device" depends on FRAMEBUFFER_CONSOLE diff --git a/drivers/video/fbdev/core/bitblit.c b/drivers/video/fbdev/core/bitblit.c index 01fae2c96965c786edbfea6c97de4a8e8b801101..f98e8f298bc19a0ae63a8d493ae2e316d8e2e38a 100644 --- a/drivers/video/fbdev/core/bitblit.c +++ b/drivers/video/fbdev/core/bitblit.c @@ -43,6 +43,21 @@ static void update_attr(u8 *dst, u8 *src, int attribute, } } +static void bit_bmove(struct vc_data *vc, struct fb_info *info, int sy, + int sx, int dy, int dx, int height, int width) +{ + struct fb_copyarea area; + + area.sx = sx * vc->vc_font.width; + area.sy = sy * vc->vc_font.height; + area.dx = dx * vc->vc_font.width; + area.dy = dy * vc->vc_font.height; + area.height = height * vc->vc_font.height; + area.width = width * vc->vc_font.width; + + info->fbops->fb_copyarea(info, &area); +} + static void bit_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width) { @@ -378,6 +393,7 @@ static int bit_update_start(struct fb_info *info) void fbcon_set_bitops(struct fbcon_ops *ops) { + ops->bmove = bit_bmove; ops->clear = bit_clear; ops->putcs = bit_putcs; ops->clear_margins = bit_clear_margins; diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 99ecd9a6d844ad9bfd68cfc0ffc8f73398ab558a..2fc1b80a26ad9f8cf400d4ed00502133f5406d3b 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -173,6 +173,8 @@ static void fbcon_putcs(struct vc_data *vc, const unsigned short *s, int count, int ypos, int xpos); static void fbcon_clear_margins(struct vc_data *vc, int bottom_only); static void fbcon_cursor(struct vc_data *vc, int mode); +static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx, + int height, int width); static int fbcon_switch(struct vc_data *vc); static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch); static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table); @@ -180,8 +182,16 @@ static void fbcon_set_palette(struct vc_data *vc, const unsigned char *table); /* * Internal routines */ +static __inline__ void ywrap_up(struct vc_data *vc, int count); +static __inline__ void ywrap_down(struct vc_data *vc, int count); +static __inline__ void ypan_up(struct vc_data *vc, int count); +static __inline__ void ypan_down(struct vc_data *vc, int count); +static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx, + int dy, int dx, int height, int width, u_int y_break); static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var, int unit); +static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p, + int line, int count, int dy); static void fbcon_modechanged(struct fb_info *info); static void fbcon_set_all_vcs(struct fb_info *info); static void fbcon_start(void); @@ -1125,6 +1135,14 @@ static void fbcon_init(struct vc_data *vc, int init) ops->graphics = 0; +#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION + if ((info->flags & FBINFO_HWACCEL_COPYAREA) && + !(info->flags & FBINFO_HWACCEL_DISABLED)) + p->scrollmode = SCROLL_MOVE; + else /* default to something safe */ + p->scrollmode = SCROLL_REDRAW; +#endif + /* * ++guenther: console.c:vc_allocate() relies on initializing * vc_{cols,rows}, but we must not set those if we are only @@ -1211,13 +1229,14 @@ finished: * This system is now divided into two levels because of complications * caused by hardware scrolling. Top level functions: * - * fbcon_clear(), fbcon_putc(), fbcon_clear_margins() + * fbcon_bmove(), fbcon_clear(), fbcon_putc(), fbcon_clear_margins() * * handles y values in range [0, scr_height-1] that correspond to real * screen positions. y_wrap shift means that first line of bitmap may be * anywhere on this display. These functions convert lineoffsets to * bitmap offsets and deal with the wrap-around case by splitting blits. * + * fbcon_bmove_physical_8() -- These functions fast implementations * fbcon_clear_physical_8() -- of original fbcon_XXX fns. * fbcon_putc_physical_8() -- (font width != 8) may be added later * @@ -1390,6 +1409,224 @@ static void fbcon_set_disp(struct fb_info *info, struct fb_var_screeninfo *var, } } +static __inline__ void ywrap_up(struct vc_data *vc, int count) +{ + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_ops *ops = info->fbcon_par; + struct fbcon_display *p = &fb_display[vc->vc_num]; + + p->yscroll += count; + if (p->yscroll >= p->vrows) /* Deal with wrap */ + p->yscroll -= p->vrows; + ops->var.xoffset = 0; + ops->var.yoffset = p->yscroll * vc->vc_font.height; + ops->var.vmode |= FB_VMODE_YWRAP; + ops->update_start(info); + scrollback_max += count; + if (scrollback_max > scrollback_phys_max) + scrollback_max = scrollback_phys_max; + scrollback_current = 0; +} + +static __inline__ void ywrap_down(struct vc_data *vc, int count) +{ + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_ops *ops = info->fbcon_par; + struct fbcon_display *p = &fb_display[vc->vc_num]; + + p->yscroll -= count; + if (p->yscroll < 0) /* Deal with wrap */ + p->yscroll += p->vrows; + ops->var.xoffset = 0; + ops->var.yoffset = p->yscroll * vc->vc_font.height; + ops->var.vmode |= FB_VMODE_YWRAP; + ops->update_start(info); + scrollback_max -= count; + if (scrollback_max < 0) + scrollback_max = 0; + scrollback_current = 0; +} + +static __inline__ void ypan_up(struct vc_data *vc, int count) +{ + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_display *p = &fb_display[vc->vc_num]; + struct fbcon_ops *ops = info->fbcon_par; + + p->yscroll += count; + if (p->yscroll > p->vrows - vc->vc_rows) { + ops->bmove(vc, info, p->vrows - vc->vc_rows, + 0, 0, 0, vc->vc_rows, vc->vc_cols); + p->yscroll -= p->vrows - vc->vc_rows; + } + + ops->var.xoffset = 0; + ops->var.yoffset = p->yscroll * vc->vc_font.height; + ops->var.vmode &= ~FB_VMODE_YWRAP; + ops->update_start(info); + fbcon_clear_margins(vc, 1); + scrollback_max += count; + if (scrollback_max > scrollback_phys_max) + scrollback_max = scrollback_phys_max; + scrollback_current = 0; +} + +static __inline__ void ypan_up_redraw(struct vc_data *vc, int t, int count) +{ + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_ops *ops = info->fbcon_par; + struct fbcon_display *p = &fb_display[vc->vc_num]; + + p->yscroll += count; + + if (p->yscroll > p->vrows - vc->vc_rows) { + p->yscroll -= p->vrows - vc->vc_rows; + fbcon_redraw_move(vc, p, t + count, vc->vc_rows - count, t); + } + + ops->var.xoffset = 0; + ops->var.yoffset = p->yscroll * vc->vc_font.height; + ops->var.vmode &= ~FB_VMODE_YWRAP; + ops->update_start(info); + fbcon_clear_margins(vc, 1); + scrollback_max += count; + if (scrollback_max > scrollback_phys_max) + scrollback_max = scrollback_phys_max; + scrollback_current = 0; +} + +static __inline__ void ypan_down(struct vc_data *vc, int count) +{ + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_display *p = &fb_display[vc->vc_num]; + struct fbcon_ops *ops = info->fbcon_par; + + p->yscroll -= count; + if (p->yscroll < 0) { + ops->bmove(vc, info, 0, 0, p->vrows - vc->vc_rows, + 0, vc->vc_rows, vc->vc_cols); + p->yscroll += p->vrows - vc->vc_rows; + } + + ops->var.xoffset = 0; + ops->var.yoffset = p->yscroll * vc->vc_font.height; + ops->var.vmode &= ~FB_VMODE_YWRAP; + ops->update_start(info); + fbcon_clear_margins(vc, 1); + scrollback_max -= count; + if (scrollback_max < 0) + scrollback_max = 0; + scrollback_current = 0; +} + +static __inline__ void ypan_down_redraw(struct vc_data *vc, int t, int count) +{ + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_ops *ops = info->fbcon_par; + struct fbcon_display *p = &fb_display[vc->vc_num]; + + p->yscroll -= count; + + if (p->yscroll < 0) { + p->yscroll += p->vrows - vc->vc_rows; + fbcon_redraw_move(vc, p, t, vc->vc_rows - count, t + count); + } + + ops->var.xoffset = 0; + ops->var.yoffset = p->yscroll * vc->vc_font.height; + ops->var.vmode &= ~FB_VMODE_YWRAP; + ops->update_start(info); + fbcon_clear_margins(vc, 1); + scrollback_max -= count; + if (scrollback_max < 0) + scrollback_max = 0; + scrollback_current = 0; +} + +static void fbcon_redraw_move(struct vc_data *vc, struct fbcon_display *p, + int line, int count, int dy) +{ + unsigned short *s = (unsigned short *) + (vc->vc_origin + vc->vc_size_row * line); + + while (count--) { + unsigned short *start = s; + unsigned short *le = advance_row(s, 1); + unsigned short c; + int x = 0; + unsigned short attr = 1; + + do { + c = scr_readw(s); + if (attr != (c & 0xff00)) { + attr = c & 0xff00; + if (s > start) { + fbcon_putcs(vc, start, s - start, + dy, x); + x += s - start; + start = s; + } + } + console_conditional_schedule(); + s++; + } while (s < le); + if (s > start) + fbcon_putcs(vc, start, s - start, dy, x); + console_conditional_schedule(); + dy++; + } +} + +static void fbcon_redraw_blit(struct vc_data *vc, struct fb_info *info, + struct fbcon_display *p, int line, int count, int ycount) +{ + int offset = ycount * vc->vc_cols; + unsigned short *d = (unsigned short *) + (vc->vc_origin + vc->vc_size_row * line); + unsigned short *s = d + offset; + struct fbcon_ops *ops = info->fbcon_par; + + while (count--) { + unsigned short *start = s; + unsigned short *le = advance_row(s, 1); + unsigned short c; + int x = 0; + + do { + c = scr_readw(s); + + if (c == scr_readw(d)) { + if (s > start) { + ops->bmove(vc, info, line + ycount, x, + line, x, 1, s-start); + x += s - start + 1; + start = s + 1; + } else { + x++; + start++; + } + } + + scr_writew(c, d); + console_conditional_schedule(); + s++; + d++; + } while (s < le); + if (s > start) + ops->bmove(vc, info, line + ycount, x, line, x, 1, + s-start); + console_conditional_schedule(); + if (ycount > 0) + line++; + else { + line--; + /* NOTE: We subtract two lines from these pointers */ + s -= vc->vc_size_row; + d -= vc->vc_size_row; + } + } +} + static void fbcon_redraw(struct vc_data *vc, struct fbcon_display *p, int line, int count, int offset) { @@ -1450,6 +1687,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b, { struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; struct fbcon_display *p = &fb_display[vc->vc_num]; + int scroll_partial = info->flags & FBINFO_PARTIAL_PAN_OK; if (fbcon_is_inactive(vc, info)) return true; @@ -1466,32 +1704,291 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b, case SM_UP: if (count > vc->vc_rows) /* Maximum realistic size */ count = vc->vc_rows; - fbcon_redraw(vc, p, t, b - t - count, - count * vc->vc_cols); - fbcon_clear(vc, b - count, 0, count, vc->vc_cols); - scr_memsetw((unsigned short *) (vc->vc_origin + - vc->vc_size_row * - (b - count)), - vc->vc_video_erase_char, - vc->vc_size_row * count); - return true; + if (logo_shown >= 0) + goto redraw_up; + switch (fb_scrollmode(p)) { + case SCROLL_MOVE: + fbcon_redraw_blit(vc, info, p, t, b - t - count, + count); + fbcon_clear(vc, b - count, 0, count, vc->vc_cols); + scr_memsetw((unsigned short *) (vc->vc_origin + + vc->vc_size_row * + (b - count)), + vc->vc_video_erase_char, + vc->vc_size_row * count); + return true; + + case SCROLL_WRAP_MOVE: + if (b - t - count > 3 * vc->vc_rows >> 2) { + if (t > 0) + fbcon_bmove(vc, 0, 0, count, 0, t, + vc->vc_cols); + ywrap_up(vc, count); + if (vc->vc_rows - b > 0) + fbcon_bmove(vc, b - count, 0, b, 0, + vc->vc_rows - b, + vc->vc_cols); + } else if (info->flags & FBINFO_READS_FAST) + fbcon_bmove(vc, t + count, 0, t, 0, + b - t - count, vc->vc_cols); + else + goto redraw_up; + fbcon_clear(vc, b - count, 0, count, vc->vc_cols); + break; + + case SCROLL_PAN_REDRAW: + if ((p->yscroll + count <= + 2 * (p->vrows - vc->vc_rows)) + && ((!scroll_partial && (b - t == vc->vc_rows)) + || (scroll_partial + && (b - t - count > + 3 * vc->vc_rows >> 2)))) { + if (t > 0) + fbcon_redraw_move(vc, p, 0, t, count); + ypan_up_redraw(vc, t, count); + if (vc->vc_rows - b > 0) + fbcon_redraw_move(vc, p, b, + vc->vc_rows - b, b); + } else + fbcon_redraw_move(vc, p, t + count, b - t - count, t); + fbcon_clear(vc, b - count, 0, count, vc->vc_cols); + break; + + case SCROLL_PAN_MOVE: + if ((p->yscroll + count <= + 2 * (p->vrows - vc->vc_rows)) + && ((!scroll_partial && (b - t == vc->vc_rows)) + || (scroll_partial + && (b - t - count > + 3 * vc->vc_rows >> 2)))) { + if (t > 0) + fbcon_bmove(vc, 0, 0, count, 0, t, + vc->vc_cols); + ypan_up(vc, count); + if (vc->vc_rows - b > 0) + fbcon_bmove(vc, b - count, 0, b, 0, + vc->vc_rows - b, + vc->vc_cols); + } else if (info->flags & FBINFO_READS_FAST) + fbcon_bmove(vc, t + count, 0, t, 0, + b - t - count, vc->vc_cols); + else + goto redraw_up; + fbcon_clear(vc, b - count, 0, count, vc->vc_cols); + break; + + case SCROLL_REDRAW: + redraw_up: + fbcon_redraw(vc, p, t, b - t - count, + count * vc->vc_cols); + fbcon_clear(vc, b - count, 0, count, vc->vc_cols); + scr_memsetw((unsigned short *) (vc->vc_origin + + vc->vc_size_row * + (b - count)), + vc->vc_video_erase_char, + vc->vc_size_row * count); + return true; + } + break; case SM_DOWN: if (count > vc->vc_rows) /* Maximum realistic size */ count = vc->vc_rows; - fbcon_redraw(vc, p, b - 1, b - t - count, - -count * vc->vc_cols); - fbcon_clear(vc, t, 0, count, vc->vc_cols); - scr_memsetw((unsigned short *) (vc->vc_origin + - vc->vc_size_row * - t), - vc->vc_video_erase_char, - vc->vc_size_row * count); - return true; + if (logo_shown >= 0) + goto redraw_down; + switch (fb_scrollmode(p)) { + case SCROLL_MOVE: + fbcon_redraw_blit(vc, info, p, b - 1, b - t - count, + -count); + fbcon_clear(vc, t, 0, count, vc->vc_cols); + scr_memsetw((unsigned short *) (vc->vc_origin + + vc->vc_size_row * + t), + vc->vc_video_erase_char, + vc->vc_size_row * count); + return true; + + case SCROLL_WRAP_MOVE: + if (b - t - count > 3 * vc->vc_rows >> 2) { + if (vc->vc_rows - b > 0) + fbcon_bmove(vc, b, 0, b - count, 0, + vc->vc_rows - b, + vc->vc_cols); + ywrap_down(vc, count); + if (t > 0) + fbcon_bmove(vc, count, 0, 0, 0, t, + vc->vc_cols); + } else if (info->flags & FBINFO_READS_FAST) + fbcon_bmove(vc, t, 0, t + count, 0, + b - t - count, vc->vc_cols); + else + goto redraw_down; + fbcon_clear(vc, t, 0, count, vc->vc_cols); + break; + + case SCROLL_PAN_MOVE: + if ((count - p->yscroll <= p->vrows - vc->vc_rows) + && ((!scroll_partial && (b - t == vc->vc_rows)) + || (scroll_partial + && (b - t - count > + 3 * vc->vc_rows >> 2)))) { + if (vc->vc_rows - b > 0) + fbcon_bmove(vc, b, 0, b - count, 0, + vc->vc_rows - b, + vc->vc_cols); + ypan_down(vc, count); + if (t > 0) + fbcon_bmove(vc, count, 0, 0, 0, t, + vc->vc_cols); + } else if (info->flags & FBINFO_READS_FAST) + fbcon_bmove(vc, t, 0, t + count, 0, + b - t - count, vc->vc_cols); + else + goto redraw_down; + fbcon_clear(vc, t, 0, count, vc->vc_cols); + break; + + case SCROLL_PAN_REDRAW: + if ((count - p->yscroll <= p->vrows - vc->vc_rows) + && ((!scroll_partial && (b - t == vc->vc_rows)) + || (scroll_partial + && (b - t - count > + 3 * vc->vc_rows >> 2)))) { + if (vc->vc_rows - b > 0) + fbcon_redraw_move(vc, p, b, vc->vc_rows - b, + b - count); + ypan_down_redraw(vc, t, count); + if (t > 0) + fbcon_redraw_move(vc, p, count, t, 0); + } else + fbcon_redraw_move(vc, p, t, b - t - count, t + count); + fbcon_clear(vc, t, 0, count, vc->vc_cols); + break; + + case SCROLL_REDRAW: + redraw_down: + fbcon_redraw(vc, p, b - 1, b - t - count, + -count * vc->vc_cols); + fbcon_clear(vc, t, 0, count, vc->vc_cols); + scr_memsetw((unsigned short *) (vc->vc_origin + + vc->vc_size_row * + t), + vc->vc_video_erase_char, + vc->vc_size_row * count); + return true; + } } return false; } + +static void fbcon_bmove(struct vc_data *vc, int sy, int sx, int dy, int dx, + int height, int width) +{ + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_display *p = &fb_display[vc->vc_num]; + + if (fbcon_is_inactive(vc, info)) + return; + + if (!width || !height) + return; + + /* Split blits that cross physical y_wrap case. + * Pathological case involves 4 blits, better to use recursive + * code rather than unrolled case + * + * Recursive invocations don't need to erase the cursor over and + * over again, so we use fbcon_bmove_rec() + */ + fbcon_bmove_rec(vc, p, sy, sx, dy, dx, height, width, + p->vrows - p->yscroll); +} + +static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, int sx, + int dy, int dx, int height, int width, u_int y_break) +{ + struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]]; + struct fbcon_ops *ops = info->fbcon_par; + u_int b; + + if (sy < y_break && sy + height > y_break) { + b = y_break - sy; + if (dy < sy) { /* Avoid trashing self */ + fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width, + y_break); + fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx, + height - b, width, y_break); + } else { + fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx, + height - b, width, y_break); + fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width, + y_break); + } + return; + } + + if (dy < y_break && dy + height > y_break) { + b = y_break - dy; + if (dy < sy) { /* Avoid trashing self */ + fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width, + y_break); + fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx, + height - b, width, y_break); + } else { + fbcon_bmove_rec(vc, p, sy + b, sx, dy + b, dx, + height - b, width, y_break); + fbcon_bmove_rec(vc, p, sy, sx, dy, dx, b, width, + y_break); + } + return; + } + ops->bmove(vc, info, real_y(p, sy), sx, real_y(p, dy), dx, + height, width); +} + +static void updatescrollmode_accel(struct fbcon_display *p, + struct fb_info *info, + struct vc_data *vc) +{ +#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION + struct fbcon_ops *ops = info->fbcon_par; + int cap = info->flags; + u16 t = 0; + int ypan = FBCON_SWAP(ops->rotate, info->fix.ypanstep, + info->fix.xpanstep); + int ywrap = FBCON_SWAP(ops->rotate, info->fix.ywrapstep, t); + int yres = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); + int vyres = FBCON_SWAP(ops->rotate, info->var.yres_virtual, + info->var.xres_virtual); + int good_pan = (cap & FBINFO_HWACCEL_YPAN) && + divides(ypan, vc->vc_font.height) && vyres > yres; + int good_wrap = (cap & FBINFO_HWACCEL_YWRAP) && + divides(ywrap, vc->vc_font.height) && + divides(vc->vc_font.height, vyres) && + divides(vc->vc_font.height, yres); + int reading_fast = cap & FBINFO_READS_FAST; + int fast_copyarea = (cap & FBINFO_HWACCEL_COPYAREA) && + !(cap & FBINFO_HWACCEL_DISABLED); + int fast_imageblit = (cap & FBINFO_HWACCEL_IMAGEBLIT) && + !(cap & FBINFO_HWACCEL_DISABLED); + + if (good_wrap || good_pan) { + if (reading_fast || fast_copyarea) + p->scrollmode = good_wrap ? + SCROLL_WRAP_MOVE : SCROLL_PAN_MOVE; + else + p->scrollmode = good_wrap ? SCROLL_REDRAW : + SCROLL_PAN_REDRAW; + } else { + if (reading_fast || (fast_copyarea && !fast_imageblit)) + p->scrollmode = SCROLL_MOVE; + else + p->scrollmode = SCROLL_REDRAW; + } +#endif +} + static void updatescrollmode(struct fbcon_display *p, struct fb_info *info, struct vc_data *vc) @@ -1507,6 +2004,9 @@ static void updatescrollmode(struct fbcon_display *p, p->vrows -= (yres - (fh * vc->vc_rows)) / fh; if ((yres % fh) && (vyres % fh < yres % fh)) p->vrows--; + + /* update scrollmode in case hardware acceleration is used */ + updatescrollmode_accel(p, info, vc); } #define PITCH(w) (((w) + 7) >> 3) @@ -1664,7 +2164,21 @@ static int fbcon_switch(struct vc_data *vc) updatescrollmode(p, info, vc); - scrollback_phys_max = 0; + switch (fb_scrollmode(p)) { + case SCROLL_WRAP_MOVE: + scrollback_phys_max = p->vrows - vc->vc_rows; + break; + case SCROLL_PAN_MOVE: + case SCROLL_PAN_REDRAW: + scrollback_phys_max = p->vrows - 2 * vc->vc_rows; + if (scrollback_phys_max < 0) + scrollback_phys_max = 0; + break; + default: + scrollback_phys_max = 0; + break; + } + scrollback_max = 0; scrollback_current = 0; diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h index a00603b4451ae7b5dd52d41604a17d42437ac343..969d41ecede5fe1e61b3a6dd0472a55c41afb358 100644 --- a/drivers/video/fbdev/core/fbcon.h +++ b/drivers/video/fbdev/core/fbcon.h @@ -29,6 +29,9 @@ struct fbcon_display { /* Filled in by the low-level console driver */ const u_char *fontdata; int userfont; /* != 0 if fontdata kmalloc()ed */ +#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION + u_short scrollmode; /* Scroll Method, use fb_scrollmode() */ +#endif u_short inverse; /* != 0 text black on white as default */ short yscroll; /* Hardware scrolling */ int vrows; /* number of virtual rows */ @@ -51,6 +54,8 @@ struct fbcon_display { }; struct fbcon_ops { + void (*bmove)(struct vc_data *vc, struct fb_info *info, int sy, + int sx, int dy, int dx, int height, int width); void (*clear)(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width); void (*putcs)(struct vc_data *vc, struct fb_info *info, @@ -149,6 +154,73 @@ static inline int attr_col_ec(int shift, struct vc_data *vc, #define attr_bgcol_ec(bgshift, vc, info) attr_col_ec(bgshift, vc, info, 0) #define attr_fgcol_ec(fgshift, vc, info) attr_col_ec(fgshift, vc, info, 1) + /* + * Scroll Method + */ + +/* There are several methods fbcon can use to move text around the screen: + * + * Operation Pan Wrap + *--------------------------------------------- + * SCROLL_MOVE copyarea No No + * SCROLL_PAN_MOVE copyarea Yes No + * SCROLL_WRAP_MOVE copyarea No Yes + * SCROLL_REDRAW imageblit No No + * SCROLL_PAN_REDRAW imageblit Yes No + * SCROLL_WRAP_REDRAW imageblit No Yes + * + * (SCROLL_WRAP_REDRAW is not implemented yet) + * + * In general, fbcon will choose the best scrolling + * method based on the rule below: + * + * Pan/Wrap > accel imageblit > accel copyarea > + * soft imageblit > (soft copyarea) + * + * Exception to the rule: Pan + accel copyarea is + * preferred over Pan + accel imageblit. + * + * The above is typical for PCI/AGP cards. Unless + * overridden, fbcon will never use soft copyarea. + * + * If you need to override the above rule, set the + * appropriate flags in fb_info->flags. For example, + * to prefer copyarea over imageblit, set + * FBINFO_READS_FAST. + * + * Other notes: + * + use the hardware engine to move the text + * (hw-accelerated copyarea() and fillrect()) + * + use hardware-supported panning on a large virtual screen + * + amifb can not only pan, but also wrap the display by N lines + * (i.e. visible line i = physical line (i+N) % yres). + * + read what's already rendered on the screen and + * write it in a different place (this is cfb_copyarea()) + * + re-render the text to the screen + * + * Whether to use wrapping or panning can only be figured out at + * runtime (when we know whether our font height is a multiple + * of the pan/wrap step) + * + */ + +#define SCROLL_MOVE 0x001 +#define SCROLL_PAN_MOVE 0x002 +#define SCROLL_WRAP_MOVE 0x003 +#define SCROLL_REDRAW 0x004 +#define SCROLL_PAN_REDRAW 0x005 + +static inline u_short fb_scrollmode(struct fbcon_display *fb) +{ +#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION + return fb->scrollmode; +#else + /* hardcoded to SCROLL_REDRAW if acceleration was disabled. */ + return SCROLL_REDRAW; +#endif +} + + #ifdef CONFIG_FB_TILEBLITTING extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info); #endif diff --git a/drivers/video/fbdev/core/fbcon_ccw.c b/drivers/video/fbdev/core/fbcon_ccw.c index ffa78936eaab0fd59e8d114e14d3b73694b68105..2789ace7963427fe80b82c19eb706daa21e316d9 100644 --- a/drivers/video/fbdev/core/fbcon_ccw.c +++ b/drivers/video/fbdev/core/fbcon_ccw.c @@ -59,12 +59,31 @@ static void ccw_update_attr(u8 *dst, u8 *src, int attribute, } } + +static void ccw_bmove(struct vc_data *vc, struct fb_info *info, int sy, + int sx, int dy, int dx, int height, int width) +{ + struct fbcon_ops *ops = info->fbcon_par; + struct fb_copyarea area; + u32 vyres = GETVYRES(ops->p, info); + + area.sx = sy * vc->vc_font.height; + area.sy = vyres - ((sx + width) * vc->vc_font.width); + area.dx = dy * vc->vc_font.height; + area.dy = vyres - ((dx + width) * vc->vc_font.width); + area.width = height * vc->vc_font.height; + area.height = width * vc->vc_font.width; + + info->fbops->fb_copyarea(info, &area); +} + static void ccw_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width) { + struct fbcon_ops *ops = info->fbcon_par; struct fb_fillrect region; int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; - u32 vyres = info->var.yres; + u32 vyres = GETVYRES(ops->p, info); region.color = attr_bgcol_ec(bgshift,vc,info); region.dx = sy * vc->vc_font.height; @@ -121,7 +140,7 @@ static void ccw_putcs(struct vc_data *vc, struct fb_info *info, u32 cnt, pitch, size; u32 attribute = get_attribute(info, scr_readw(s)); u8 *dst, *buf = NULL; - u32 vyres = info->var.yres; + u32 vyres = GETVYRES(ops->p, info); if (!ops->fontbuffer) return; @@ -210,7 +229,7 @@ static void ccw_cursor(struct vc_data *vc, struct fb_info *info, int mode, int attribute, use_sw = vc->vc_cursor_type & CUR_SW; int err = 1, dx, dy; char *src; - u32 vyres = info->var.yres; + u32 vyres = GETVYRES(ops->p, info); if (!ops->fontbuffer) return; @@ -368,7 +387,7 @@ static int ccw_update_start(struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; u32 yoffset; - u32 vyres = info->var.yres; + u32 vyres = GETVYRES(ops->p, info); int err; yoffset = (vyres - info->var.yres) - ops->var.xoffset; @@ -383,6 +402,7 @@ static int ccw_update_start(struct fb_info *info) void fbcon_rotate_ccw(struct fbcon_ops *ops) { + ops->bmove = ccw_bmove; ops->clear = ccw_clear; ops->putcs = ccw_putcs; ops->clear_margins = ccw_clear_margins; diff --git a/drivers/video/fbdev/core/fbcon_cw.c b/drivers/video/fbdev/core/fbcon_cw.c index 92e5b7fb51ee2b27737256cd9d285b0dbbd4bc31..86a254c1b2b7b6bd039fde6cbc588f009b7ed338 100644 --- a/drivers/video/fbdev/core/fbcon_cw.c +++ b/drivers/video/fbdev/core/fbcon_cw.c @@ -44,12 +44,31 @@ static void cw_update_attr(u8 *dst, u8 *src, int attribute, } } + +static void cw_bmove(struct vc_data *vc, struct fb_info *info, int sy, + int sx, int dy, int dx, int height, int width) +{ + struct fbcon_ops *ops = info->fbcon_par; + struct fb_copyarea area; + u32 vxres = GETVXRES(ops->p, info); + + area.sx = vxres - ((sy + height) * vc->vc_font.height); + area.sy = sx * vc->vc_font.width; + area.dx = vxres - ((dy + height) * vc->vc_font.height); + area.dy = dx * vc->vc_font.width; + area.width = height * vc->vc_font.height; + area.height = width * vc->vc_font.width; + + info->fbops->fb_copyarea(info, &area); +} + static void cw_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width) { + struct fbcon_ops *ops = info->fbcon_par; struct fb_fillrect region; int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; - u32 vxres = info->var.xres; + u32 vxres = GETVXRES(ops->p, info); region.color = attr_bgcol_ec(bgshift,vc,info); region.dx = vxres - ((sy + height) * vc->vc_font.height); @@ -106,7 +125,7 @@ static void cw_putcs(struct vc_data *vc, struct fb_info *info, u32 cnt, pitch, size; u32 attribute = get_attribute(info, scr_readw(s)); u8 *dst, *buf = NULL; - u32 vxres = info->var.xres; + u32 vxres = GETVXRES(ops->p, info); if (!ops->fontbuffer) return; @@ -193,7 +212,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode, int attribute, use_sw = vc->vc_cursor_type & CUR_SW; int err = 1, dx, dy; char *src; - u32 vxres = info->var.xres; + u32 vxres = GETVXRES(ops->p, info); if (!ops->fontbuffer) return; @@ -350,7 +369,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode, static int cw_update_start(struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; - u32 vxres = info->var.xres; + u32 vxres = GETVXRES(ops->p, info); u32 xoffset; int err; @@ -366,6 +385,7 @@ static int cw_update_start(struct fb_info *info) void fbcon_rotate_cw(struct fbcon_ops *ops) { + ops->bmove = cw_bmove; ops->clear = cw_clear; ops->putcs = cw_putcs; ops->clear_margins = cw_clear_margins; diff --git a/drivers/video/fbdev/core/fbcon_rotate.h b/drivers/video/fbdev/core/fbcon_rotate.h index b528b2e54283a50086db8ddaff5902dd8a3cec3d..01cbe303b8a29592d3236add1f8b46db85d2b74f 100644 --- a/drivers/video/fbdev/core/fbcon_rotate.h +++ b/drivers/video/fbdev/core/fbcon_rotate.h @@ -11,6 +11,15 @@ #ifndef _FBCON_ROTATE_H #define _FBCON_ROTATE_H +#define GETVYRES(s,i) ({ \ + (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE) ? \ + (i)->var.yres : (i)->var.yres_virtual; }) + +#define GETVXRES(s,i) ({ \ + (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE || !(i)->fix.xpanstep) ? \ + (i)->var.xres : (i)->var.xres_virtual; }) + + static inline int pattern_test_bit(u32 x, u32 y, u32 pitch, const char *pat) { u32 tmp = (y * pitch) + x, index = tmp / 8, bit = tmp % 8; diff --git a/drivers/video/fbdev/core/fbcon_ud.c b/drivers/video/fbdev/core/fbcon_ud.c index 09619bd8e02178570a6219578cc8bce0b8937945..23bc045769d088e8f9690c03dc8228d71635daa0 100644 --- a/drivers/video/fbdev/core/fbcon_ud.c +++ b/drivers/video/fbdev/core/fbcon_ud.c @@ -44,13 +44,33 @@ static void ud_update_attr(u8 *dst, u8 *src, int attribute, } } + +static void ud_bmove(struct vc_data *vc, struct fb_info *info, int sy, + int sx, int dy, int dx, int height, int width) +{ + struct fbcon_ops *ops = info->fbcon_par; + struct fb_copyarea area; + u32 vyres = GETVYRES(ops->p, info); + u32 vxres = GETVXRES(ops->p, info); + + area.sy = vyres - ((sy + height) * vc->vc_font.height); + area.sx = vxres - ((sx + width) * vc->vc_font.width); + area.dy = vyres - ((dy + height) * vc->vc_font.height); + area.dx = vxres - ((dx + width) * vc->vc_font.width); + area.height = height * vc->vc_font.height; + area.width = width * vc->vc_font.width; + + info->fbops->fb_copyarea(info, &area); +} + static void ud_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width) { + struct fbcon_ops *ops = info->fbcon_par; struct fb_fillrect region; int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; - u32 vyres = info->var.yres; - u32 vxres = info->var.xres; + u32 vyres = GETVYRES(ops->p, info); + u32 vxres = GETVXRES(ops->p, info); region.color = attr_bgcol_ec(bgshift,vc,info); region.dy = vyres - ((sy + height) * vc->vc_font.height); @@ -142,8 +162,8 @@ static void ud_putcs(struct vc_data *vc, struct fb_info *info, u32 mod = vc->vc_font.width % 8, cnt, pitch, size; u32 attribute = get_attribute(info, scr_readw(s)); u8 *dst, *buf = NULL; - u32 vyres = info->var.yres; - u32 vxres = info->var.xres; + u32 vyres = GETVYRES(ops->p, info); + u32 vxres = GETVXRES(ops->p, info); if (!ops->fontbuffer) return; @@ -239,8 +259,8 @@ static void ud_cursor(struct vc_data *vc, struct fb_info *info, int mode, int attribute, use_sw = vc->vc_cursor_type & CUR_SW; int err = 1, dx, dy; char *src; - u32 vyres = info->var.yres; - u32 vxres = info->var.xres; + u32 vyres = GETVYRES(ops->p, info); + u32 vxres = GETVXRES(ops->p, info); if (!ops->fontbuffer) return; @@ -390,8 +410,8 @@ static int ud_update_start(struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; int xoffset, yoffset; - u32 vyres = info->var.yres; - u32 vxres = info->var.xres; + u32 vyres = GETVYRES(ops->p, info); + u32 vxres = GETVXRES(ops->p, info); int err; xoffset = vxres - info->var.xres - ops->var.xoffset; @@ -409,6 +429,7 @@ static int ud_update_start(struct fb_info *info) void fbcon_rotate_ud(struct fbcon_ops *ops) { + ops->bmove = ud_bmove; ops->clear = ud_clear; ops->putcs = ud_putcs; ops->clear_margins = ud_clear_margins; diff --git a/drivers/video/fbdev/core/fbmem.c b/drivers/video/fbdev/core/fbmem.c index 0fa7ede94fa61a642c50f1d263f7c7bae5646218..13083ad8d7515ecdf3829b5775037de61c393a18 100644 --- a/drivers/video/fbdev/core/fbmem.c +++ b/drivers/video/fbdev/core/fbmem.c @@ -1160,6 +1160,8 @@ static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, ret = fbcon_set_con2fb_map_ioctl(argp); break; case FBIOBLANK: + if (arg > FB_BLANK_POWERDOWN) + return -EINVAL; console_lock(); lock_fb_info(info); ret = fb_blank(info, arg); diff --git a/drivers/video/fbdev/core/tileblit.c b/drivers/video/fbdev/core/tileblit.c index 72af95053bcb5316192e46be036a6abdadf11375..2768eff247ba4623dd0beaa8e2d96926c472f519 100644 --- a/drivers/video/fbdev/core/tileblit.c +++ b/drivers/video/fbdev/core/tileblit.c @@ -16,6 +16,21 @@ #include #include "fbcon.h" +static void tile_bmove(struct vc_data *vc, struct fb_info *info, int sy, + int sx, int dy, int dx, int height, int width) +{ + struct fb_tilearea area; + + area.sx = sx; + area.sy = sy; + area.dx = dx; + area.dy = dy; + area.height = height; + area.width = width; + + info->tileops->fb_tilecopy(info, &area); +} + static void tile_clear(struct vc_data *vc, struct fb_info *info, int sy, int sx, int height, int width) { @@ -118,6 +133,7 @@ void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info) struct fb_tilemap map; struct fbcon_ops *ops = info->fbcon_par; + ops->bmove = tile_bmove; ops->clear = tile_clear; ops->putcs = tile_putcs; ops->clear_margins = tile_clear_margins; diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 23999df52739349783f5b1d4b1927e41bd3b16b8..c8e0ea27caf1d9816375066c3b9308f8ee73d8fc 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -287,8 +287,6 @@ struct hvfb_par { static uint screen_width = HVFB_WIDTH; static uint screen_height = HVFB_HEIGHT; -static uint screen_width_max = HVFB_WIDTH; -static uint screen_height_max = HVFB_HEIGHT; static uint screen_depth; static uint screen_fb_size; static uint dio_fb_size; /* FB size for deferred IO */ @@ -582,7 +580,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev) int ret = 0; unsigned long t; u8 index; - int i; memset(msg, 0, sizeof(struct synthvid_msg)); msg->vid_hdr.type = SYNTHVID_RESOLUTION_REQUEST; @@ -613,13 +610,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev) goto out; } - for (i = 0; i < msg->resolution_resp.resolution_count; i++) { - screen_width_max = max_t(unsigned int, screen_width_max, - msg->resolution_resp.supported_resolution[i].width); - screen_height_max = max_t(unsigned int, screen_height_max, - msg->resolution_resp.supported_resolution[i].height); - } - screen_width = msg->resolution_resp.supported_resolution[index].width; screen_height = @@ -941,7 +931,7 @@ static void hvfb_get_option(struct fb_info *info) if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN || (synthvid_ver_ge(par->synthvid_version, SYNTHVID_VERSION_WIN10) && - (x > screen_width_max || y > screen_height_max)) || + (x * y * screen_depth / 8 > screen_fb_size)) || (par->synthvid_version == SYNTHVID_VERSION_WIN8 && x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) || (par->synthvid_version == SYNTHVID_VERSION_WIN7 && @@ -1194,8 +1184,8 @@ static int hvfb_probe(struct hv_device *hdev, } hvfb_get_option(info); - pr_info("Screen resolution: %dx%d, Color depth: %d\n", - screen_width, screen_height, screen_depth); + pr_info("Screen resolution: %dx%d, Color depth: %d, Frame buffer size: %d\n", + screen_width, screen_height, screen_depth, screen_fb_size); ret = hvfb_getmem(hdev, info); if (ret) { diff --git a/drivers/video/fbdev/skeletonfb.c b/drivers/video/fbdev/skeletonfb.c index 0fe922f726e98a14c71ae4a78708fd61cdc93da2..bcacfb6934fa9a02c90a8bdca928b1544a102f13 100644 --- a/drivers/video/fbdev/skeletonfb.c +++ b/drivers/video/fbdev/skeletonfb.c @@ -505,15 +505,15 @@ void xxxfb_fillrect(struct fb_info *p, const struct fb_fillrect *region) } /** - * xxxfb_copyarea - OBSOLETE function. + * xxxfb_copyarea - REQUIRED function. Can use generic routines if + * non acclerated hardware and packed pixel based. * Copies one area of the screen to another area. - * Will be deleted in a future version * * @info: frame buffer structure that represents a single frame buffer * @area: Structure providing the data to copy the framebuffer contents * from one region to another. * - * This drawing operation copied a rectangular area from one area of the + * This drawing operation copies a rectangular area from one area of the * screen to another area. */ void xxxfb_copyarea(struct fb_info *p, const struct fb_copyarea *area) @@ -645,9 +645,9 @@ static const struct fb_ops xxxfb_ops = { .fb_setcolreg = xxxfb_setcolreg, .fb_blank = xxxfb_blank, .fb_pan_display = xxxfb_pan_display, - .fb_fillrect = xxxfb_fillrect, /* Needed !!! */ - .fb_copyarea = xxxfb_copyarea, /* Obsolete */ - .fb_imageblit = xxxfb_imageblit, /* Needed !!! */ + .fb_fillrect = xxxfb_fillrect, /* Needed !!! */ + .fb_copyarea = xxxfb_copyarea, /* Needed !!! */ + .fb_imageblit = xxxfb_imageblit, /* Needed !!! */ .fb_cursor = xxxfb_cursor, /* Optional !!! */ .fb_sync = xxxfb_sync, .fb_ioctl = xxxfb_ioctl, diff --git a/drivers/virt/acrn/ioreq.c b/drivers/virt/acrn/ioreq.c index 80b2e3f0e2764fbf495db8a5576b5e00f8cccd6b..5ff1c53740c00ba31b49eb1057662d907f6b7c98 100644 --- a/drivers/virt/acrn/ioreq.c +++ b/drivers/virt/acrn/ioreq.c @@ -246,8 +246,7 @@ void acrn_ioreq_request_clear(struct acrn_vm *vm) spin_lock_bh(&vm->ioreq_clients_lock); client = vm->default_client; if (client) { - vcpu = find_next_bit(client->ioreqs_map, - ACRN_IO_REQUEST_MAX, 0); + vcpu = find_first_bit(client->ioreqs_map, ACRN_IO_REQUEST_MAX); while (vcpu < ACRN_IO_REQUEST_MAX) { acrn_ioreq_complete_request(client, vcpu, NULL); vcpu = find_next_bit(client->ioreqs_map, diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index 2c890f4f2cbcb609616b077d4b2fecc7b579ff19..72d4e3f193af1ba5f7ffd704cd353ac1abddfd4b 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -264,7 +264,7 @@ struct xen_device_domain_owner { }; static DEFINE_SPINLOCK(dev_domain_list_spinlock); -static struct list_head dev_domain_list = LIST_HEAD_INIT(dev_domain_list); +static LIST_HEAD(dev_domain_list); static struct xen_device_domain_owner *find_device(struct pci_dev *dev) { diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c index 1c9ae08225d8d818ab6550adc4346032bb5fa893..f916bf60b312faf7f85d36fab1b2bcaa48d5d883 100644 --- a/drivers/zorro/proc.c +++ b/drivers/zorro/proc.c @@ -30,7 +30,7 @@ proc_bus_zorro_lseek(struct file *file, loff_t off, int whence) static ssize_t proc_bus_zorro_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - struct zorro_dev *z = PDE_DATA(file_inode(file)); + struct zorro_dev *z = pde_data(file_inode(file)); struct ConfigDev cd; loff_t pos = *ppos; diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 6aab046c98e291ebf701a26c46614baa4a1a427f..79df61fe0e5964667f604eadc37b51b1139fd222 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -96,12 +96,8 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) dentry, dentry, from_kuid(&init_user_ns, uid), any); ret = NULL; - - if (d_inode(dentry)) - ret = v9fs_fid_find_inode(d_inode(dentry), uid); - /* we'll recheck under lock if there's anything to look in */ - if (!ret && dentry->d_fsdata) { + if (dentry->d_fsdata) { struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata; spin_lock(&dentry->d_lock); @@ -113,6 +109,9 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) } } spin_unlock(&dentry->d_lock); + } else { + if (dentry->d_inode) + ret = v9fs_fid_find_inode(dentry->d_inode, uid); } return ret; diff --git a/fs/Kconfig b/fs/Kconfig index 7a2b11c0b8036d4163274667b5d569a24facd089..6c7dc1387beb0f2ac34be3e56dd5579130c8ec13 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -369,8 +369,8 @@ source "fs/ksmbd/Kconfig" config SMBFS_COMMON tristate - default y if CIFS=y - default m if CIFS=m + default y if CIFS=y || SMB_SERVER=y + default m if CIFS=m || SMB_SERVER=m source "fs/coda/Kconfig" source "fs/afs/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index 84c5e4cdfee5a694da7fc96b6a08109d653e4cb5..208a74e0b00e12edf164468e7a63cda611f12a8b 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -6,6 +6,8 @@ # Rewritten to use lists instead of if-statements. # +obj-$(CONFIG_SYSCTL) += sysctls.o + obj-y := open.o read_write.o file_table.o super.o \ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o dcache.o inode.o \ @@ -94,7 +96,7 @@ obj-$(CONFIG_EXPORTFS) += exportfs/ obj-$(CONFIG_NFSD) += nfsd/ obj-$(CONFIG_LOCKD) += lockd/ obj-$(CONFIG_NLS) += nls/ -obj-$(CONFIG_UNICODE) += unicode/ +obj-y += unicode/ obj-$(CONFIG_SYSV_FS) += sysv/ obj-$(CONFIG_SMBFS_COMMON) += smbfs_common/ obj-$(CONFIG_CIFS) += cifs/ diff --git a/fs/afs/proc.c b/fs/afs/proc.c index 065a28bfa3f18384881cce5cf47d09d1b6e6fd07..e1b863449296c32a93408d72ba1c88a0c7ddf944 100644 --- a/fs/afs/proc.c +++ b/fs/afs/proc.c @@ -227,7 +227,7 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v) static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) __acquires(cell->proc_lock) { - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + struct afs_cell *cell = pde_data(file_inode(m->file)); rcu_read_lock(); return seq_hlist_start_head_rcu(&cell->proc_volumes, *_pos); @@ -236,7 +236,7 @@ static void *afs_proc_cell_volumes_start(struct seq_file *m, loff_t *_pos) static void *afs_proc_cell_volumes_next(struct seq_file *m, void *v, loff_t *_pos) { - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + struct afs_cell *cell = pde_data(file_inode(m->file)); return seq_hlist_next_rcu(v, &cell->proc_volumes, _pos); } @@ -322,7 +322,7 @@ static void *afs_proc_cell_vlservers_start(struct seq_file *m, loff_t *_pos) { struct afs_vl_seq_net_private *priv = m->private; struct afs_vlserver_list *vllist; - struct afs_cell *cell = PDE_DATA(file_inode(m->file)); + struct afs_cell *cell = pde_data(file_inode(m->file)); loff_t pos = *_pos; rcu_read_lock(); diff --git a/fs/aio.c b/fs/aio.c index f6f1cbffef9e8c8cc302c2b2c5b8d55a6172fe3b..4ceba13a7db0f60510174c8b8d46f461ab3034ea 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -220,9 +220,35 @@ struct aio_kiocb { /*------ sysctl variables----*/ static DEFINE_SPINLOCK(aio_nr_lock); -unsigned long aio_nr; /* current system wide number of aio requests */ -unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ +static unsigned long aio_nr; /* current system wide number of aio requests */ +static unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ /*----end sysctl variables---*/ +#ifdef CONFIG_SYSCTL +static struct ctl_table aio_sysctls[] = { + { + .procname = "aio-nr", + .data = &aio_nr, + .maxlen = sizeof(aio_nr), + .mode = 0444, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "aio-max-nr", + .data = &aio_max_nr, + .maxlen = sizeof(aio_max_nr), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + {} +}; + +static void __init aio_sysctl_init(void) +{ + register_sysctl_init("fs", aio_sysctls); +} +#else +#define aio_sysctl_init() do { } while (0) +#endif static struct kmem_cache *kiocb_cachep; static struct kmem_cache *kioctx_cachep; @@ -275,6 +301,7 @@ static int __init aio_setup(void) kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC); kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC); + aio_sysctl_init(); return 0; } __initcall(aio_setup); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 605017eb9349e5e9de8b60ff38118eb0428c11dc..9e11e6f13e83affc3bedb4850a362e59a303a676 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1117,7 +1117,7 @@ out_free_interp: * without MAP_FIXED nor MAP_FIXED_NOREPLACE). */ alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); - if (alignment > ELF_MIN_ALIGN) { + if (interpreter || alignment > ELF_MIN_ALIGN) { load_bias = ELF_ET_DYN_BASE; if (current->flags & PF_RANDOMIZE) load_bias += arch_mmap_rnd(); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 1db24e6d6d906d462e29fc8c50523ae71e685780..8202ad6aa131740ec44474f0f82cb451f43eaebc 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -124,7 +124,16 @@ void btrfs_put_block_group(struct btrfs_block_group *cache) { if (refcount_dec_and_test(&cache->refs)) { WARN_ON(cache->pinned > 0); - WARN_ON(cache->reserved > 0); + /* + * If there was a failure to cleanup a log tree, very likely due + * to an IO failure on a writeback attempt of one or more of its + * extent buffers, we could not do proper (and cheap) unaccounting + * of their reserved space, so don't warn on reserved > 0 in that + * case. + */ + if (!(cache->flags & BTRFS_BLOCK_GROUP_METADATA) || + !BTRFS_FS_LOG_CLEANUP_ERROR(cache->fs_info)) + WARN_ON(cache->reserved > 0); /* * A block_group shouldn't be on the discard_list anymore. @@ -2544,6 +2553,19 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, int ret; bool dirty_bg_running; + /* + * This can only happen when we are doing read-only scrub on read-only + * mount. + * In that case we should not start a new transaction on read-only fs. + * Thus here we skip all chunk allocations. + */ + if (sb_rdonly(fs_info->sb)) { + mutex_lock(&fs_info->ro_block_group_mutex); + ret = inc_block_group_ro(cache, 0); + mutex_unlock(&fs_info->ro_block_group_mutex); + return ret; + } + do { trans = btrfs_join_transaction(root); if (IS_ERR(trans)) @@ -3974,9 +3996,22 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) * important and indicates a real bug if this happens. */ if (WARN_ON(space_info->bytes_pinned > 0 || - space_info->bytes_reserved > 0 || space_info->bytes_may_use > 0)) btrfs_dump_space_info(info, space_info, 0, 0); + + /* + * If there was a failure to cleanup a log tree, very likely due + * to an IO failure on a writeback attempt of one or more of its + * extent buffers, we could not do proper (and cheap) unaccounting + * of their reserved space, so don't warn on bytes_reserved > 0 in + * that case. + */ + if (!(space_info->flags & BTRFS_BLOCK_GROUP_METADATA) || + !BTRFS_FS_LOG_CLEANUP_ERROR(info)) { + if (WARN_ON(space_info->bytes_reserved > 0)) + btrfs_dump_space_info(info, space_info, 0, 0); + } + WARN_ON(space_info->reclaim_size > 0); list_del(&space_info->list); btrfs_sysfs_remove_space_info(space_info); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index b4a9b1c58d2286efb3f2e159dc16f31df2cf2b81..8992e0096163e2c4bc6b9c86b180ef0a47232691 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -145,6 +145,9 @@ enum { BTRFS_FS_STATE_DUMMY_FS_INFO, BTRFS_FS_STATE_NO_CSUMS, + + /* Indicates there was an error cleaning up a log tree. */ + BTRFS_FS_STATE_LOG_CLEANUP_ERROR, }; #define BTRFS_BACKREF_REV_MAX 256 @@ -3593,6 +3596,9 @@ do { \ #define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \ &(fs_info)->fs_state))) +#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \ + (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \ + &(fs_info)->fs_state))) __printf(5, 6) __cold diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d6d48ecf823c90cfeab6d3d8d044cd87f4d08357..409bad3928db394f9782f8236f4d628792cf82e5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include "misc.h" #include "extent_io.h" @@ -3578,15 +3577,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, goto out; } - if (!PageUptodate(page)) { - if (cleancache_get_page(page) == 0) { - BUG_ON(blocksize != PAGE_SIZE); - unlock_extent(tree, start, end); - unlock_page(page); - goto out; - } - } - if (page->index == last_byte >> PAGE_SHIFT) { size_t zero_offset = offset_in_page(last_byte); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a5bd6926f7ffe9741eeddcbd5ce246019728423e..927771d1853f791862180999fe23b7e97def5364 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -805,10 +805,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, goto fail; } - spin_lock(&fs_info->trans_lock); - list_add(&pending_snapshot->list, - &trans->transaction->pending_snapshots); - spin_unlock(&fs_info->trans_lock); + trans->pending_snapshot = pending_snapshot; ret = btrfs_commit_transaction(trans); if (ret) @@ -1213,6 +1210,39 @@ static int defrag_collect_targets(struct btrfs_inode *inode, if (em->generation < newer_than) goto next; + /* This em is under writeback, no need to defrag */ + if (em->generation == (u64)-1) + goto next; + + /* + * Our start offset might be in the middle of an existing extent + * map, so take that into account. + */ + range_len = em->len - (cur - em->start); + /* + * If this range of the extent map is already flagged for delalloc, + * skip it, because: + * + * 1) We could deadlock later, when trying to reserve space for + * delalloc, because in case we can't immediately reserve space + * the flusher can start delalloc and wait for the respective + * ordered extents to complete. The deadlock would happen + * because we do the space reservation while holding the range + * locked, and starting writeback, or finishing an ordered + * extent, requires locking the range; + * + * 2) If there's delalloc there, it means there's dirty pages for + * which writeback has not started yet (we clean the delalloc + * flag when starting writeback and after creating an ordered + * extent). If we mark pages in an adjacent range for defrag, + * then we will have a larger contiguous range for delalloc, + * very likely resulting in a larger extent after writeback is + * triggered (except in a case of free space fragmentation). + */ + if (test_range_bit(&inode->io_tree, cur, cur + range_len - 1, + EXTENT_DELALLOC, 0, NULL)) + goto next; + /* * For do_compress case, we want to compress all valid file * extents, thus no @extent_thresh or mergeable check. @@ -1221,7 +1251,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode, goto add; /* Skip too large extent */ - if (em->len >= extent_thresh) + if (range_len >= extent_thresh) goto next; next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em, @@ -1442,9 +1472,11 @@ static int defrag_one_cluster(struct btrfs_inode *inode, list_for_each_entry(entry, &target_list, list) { u32 range_len = entry->len; - /* Reached the limit */ - if (max_sectors && max_sectors == *sectors_defragged) + /* Reached or beyond the limit */ + if (max_sectors && *sectors_defragged >= max_sectors) { + ret = 1; break; + } if (max_sectors) range_len = min_t(u32, range_len, @@ -1465,7 +1497,8 @@ static int defrag_one_cluster(struct btrfs_inode *inode, extent_thresh, newer_than, do_compress); if (ret < 0) break; - *sectors_defragged += range_len; + *sectors_defragged += range_len >> + inode->root->fs_info->sectorsize_bits; } out: list_for_each_entry_safe(entry, tmp, &target_list, list) { @@ -1484,6 +1517,12 @@ out: * @newer_than: minimum transid to defrag * @max_to_defrag: max number of sectors to be defragged, if 0, the whole inode * will be defragged. + * + * Return <0 for error. + * Return >=0 for the number of sectors defragged, and range->start will be updated + * to indicate the file offset where next defrag should be started at. + * (Mostly for autodefrag, which sets @max_to_defrag thus we may exit early without + * defragging all the range). */ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, struct btrfs_ioctl_defrag_range_args *range, @@ -1499,6 +1538,7 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, int compress_type = BTRFS_COMPRESS_ZLIB; int ret = 0; u32 extent_thresh = range->extent_thresh; + pgoff_t start_index; if (isize == 0) return 0; @@ -1518,12 +1558,16 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, if (range->start + range->len > range->start) { /* Got a specific range */ - last_byte = min(isize, range->start + range->len) - 1; + last_byte = min(isize, range->start + range->len); } else { /* Defrag until file end */ - last_byte = isize - 1; + last_byte = isize; } + /* Align the range */ + cur = round_down(range->start, fs_info->sectorsize); + last_byte = round_up(last_byte, fs_info->sectorsize) - 1; + /* * If we were not given a ra, allocate a readahead context. As * readahead is just an optimization, defrag will work without it so @@ -1536,16 +1580,26 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, file_ra_state_init(ra, inode->i_mapping); } - /* Align the range */ - cur = round_down(range->start, fs_info->sectorsize); - last_byte = round_up(last_byte, fs_info->sectorsize) - 1; + /* + * Make writeback start from the beginning of the range, so that the + * defrag range can be written sequentially. + */ + start_index = cur >> PAGE_SHIFT; + if (start_index < inode->i_mapping->writeback_index) + inode->i_mapping->writeback_index = start_index; while (cur < last_byte) { + const unsigned long prev_sectors_defragged = sectors_defragged; u64 cluster_end; /* The cluster size 256K should always be page aligned */ BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE)); + if (btrfs_defrag_cancelled(fs_info)) { + ret = -EAGAIN; + break; + } + /* We want the cluster end at page boundary when possible */ cluster_end = (((cur >> PAGE_SHIFT) + (SZ_256K >> PAGE_SHIFT)) << PAGE_SHIFT) - 1; @@ -1567,14 +1621,28 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra, cluster_end + 1 - cur, extent_thresh, newer_than, do_compress, §ors_defragged, max_to_defrag); + + if (sectors_defragged > prev_sectors_defragged) + balance_dirty_pages_ratelimited(inode->i_mapping); + btrfs_inode_unlock(inode, 0); if (ret < 0) break; cur = cluster_end + 1; + if (ret > 0) { + ret = 0; + break; + } + cond_resched(); } if (ra_allocated) kfree(ra); + /* + * Update range.start for autodefrag, this will indicate where to start + * in next run. + */ + range->start = cur; if (sectors_defragged) { /* * We have defragged some sectors, for compression case they @@ -3086,10 +3154,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, btrfs_inode_lock(inode, 0); err = btrfs_delete_subvolume(dir, dentry); btrfs_inode_unlock(inode, 0); - if (!err) { - fsnotify_rmdir(dir, dentry); - d_delete(dentry); - } + if (!err) + d_delete_notify(dir, dentry); out_dput: dput(dentry); @@ -3290,7 +3356,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) struct block_device *bdev = NULL; fmode_t mode; int ret; - bool cancel; + bool cancel = false; if (!capable(CAP_SYS_ADMIN)) return -EPERM; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 8928275823a1708779fe5edad275971f9de25c4e..f12dc687350c71cd103e7e0156dcb2dd903c1162 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1185,9 +1185,24 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) struct btrfs_trans_handle *trans = NULL; int ret = 0; + /* + * We need to have subvol_sem write locked, to prevent races between + * concurrent tasks trying to disable quotas, because we will unlock + * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes. + */ + lockdep_assert_held_write(&fs_info->subvol_sem); + mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; + + /* + * Request qgroup rescan worker to complete and wait for it. This wait + * must be done before transaction start for quota disable since it may + * deadlock with transaction by the qgroup rescan worker. + */ + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + btrfs_qgroup_wait_for_completion(fs_info, false); mutex_unlock(&fs_info->qgroup_ioctl_lock); /* @@ -1205,14 +1220,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); goto out; } if (!fs_info->quota_root) goto out; - clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); - btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; fs_info->quota_root = NULL; @@ -3383,6 +3397,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, btrfs_warn(fs_info, "qgroup rescan init failed, qgroup is not enabled"); ret = -EINVAL; + } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { + /* Quota disable is in progress */ + ret = -EBUSY; } if (ret) { diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index d8ccb62aa7d27699cde6e19703fedc50193798aa..201eb2628aea117a6b5c14d02973f0bf143fc4af 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -4999,6 +4999,10 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) lock_page(page); if (!PageUptodate(page)) { unlock_page(page); + btrfs_err(fs_info, + "send: IO error at offset %llu for inode %llu root %llu", + page_offset(page), sctx->cur_ino, + sctx->send_root->root_key.objectid); put_page(page); ret = -EIO; break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0ec09fe01be607c0adb443f54b46518400f33d73..4d947ba32da9d5930def2bbebca19c960f464313 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -1374,7 +1373,6 @@ static int btrfs_fill_super(struct super_block *sb, goto fail_close; } - cleancache_init_fs(sb); sb->s_flags |= SB_ACTIVE; return 0; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 03de89b45f279e0b1e960af1013fe50ac88a6bf0..c3cfdfd8de9b2d2ec040343f9f1dadca6cfe624e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1981,16 +1981,24 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) { /* - * We use writeback_inodes_sb here because if we used + * We use try_to_writeback_inodes_sb() here because if we used * btrfs_start_delalloc_roots we would deadlock with fs freeze. * Currently are holding the fs freeze lock, if we do an async flush * we'll do btrfs_join_transaction() and deadlock because we need to * wait for the fs freeze lock. Using the direct flushing we benefit * from already being in a transaction and our join_transaction doesn't * have to re-take the fs freeze lock. + * + * Note that try_to_writeback_inodes_sb() will only trigger writeback + * if it can read lock sb->s_umount. It will always be able to lock it, + * except when the filesystem is being unmounted or being frozen, but in + * those cases sync_filesystem() is called, which results in calling + * writeback_inodes_sb() while holding a write lock on sb->s_umount. + * Note that we don't call writeback_inodes_sb() directly, because it + * will emit a warning if sb->s_umount is not locked. */ if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) - writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC); + try_to_writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC); return 0; } @@ -2000,6 +2008,27 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); } +/* + * Add a pending snapshot associated with the given transaction handle to the + * respective handle. This must be called after the transaction commit started + * and while holding fs_info->trans_lock. + * This serves to guarantee a caller of btrfs_commit_transaction() that it can + * safely free the pending snapshot pointer in case btrfs_commit_transaction() + * returns an error. + */ +static void add_pending_snapshot(struct btrfs_trans_handle *trans) +{ + struct btrfs_transaction *cur_trans = trans->transaction; + + if (!trans->pending_snapshot) + return; + + lockdep_assert_held(&trans->fs_info->trans_lock); + ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START); + + list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots); +} + int btrfs_commit_transaction(struct btrfs_trans_handle *trans) { struct btrfs_fs_info *fs_info = trans->fs_info; @@ -2073,6 +2102,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (cur_trans->state >= TRANS_STATE_COMMIT_START) { enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED; + add_pending_snapshot(trans); + spin_unlock(&fs_info->trans_lock); refcount_inc(&cur_trans->use_count); @@ -2163,6 +2194,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) * COMMIT_DOING so make sure to wait for num_writers to == 1 again. */ spin_lock(&fs_info->trans_lock); + add_pending_snapshot(trans); cur_trans->state = TRANS_STATE_COMMIT_DOING; spin_unlock(&fs_info->trans_lock); wait_event(cur_trans->writer_wait, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 1852ed9de7fd56bf9f0d9b41d96a29e5a3e66cac..9402d8d944846f2ab398d46125b5448e2b6dc538 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -123,6 +123,8 @@ struct btrfs_trans_handle { struct btrfs_transaction *transaction; struct btrfs_block_rsv *block_rsv; struct btrfs_block_rsv *orig_rsv; + /* Set by a task that wants to create a snapshot. */ + struct btrfs_pending_snapshot *pending_snapshot; refcount_t use_count; unsigned int type; /* diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 72e1c942197df816bb07f08dc686467900ae3c56..9fd145f1c4bc265da7362a6acb3fb693ec4b817d 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -965,6 +965,7 @@ static int check_dev_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_dev_item *ditem; + const u32 item_size = btrfs_item_size(leaf, slot); if (unlikely(key->objectid != BTRFS_DEV_ITEMS_OBJECTID)) { dev_item_err(leaf, slot, @@ -972,6 +973,13 @@ static int check_dev_item(struct extent_buffer *leaf, key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; } + + if (unlikely(item_size != sizeof(*ditem))) { + dev_item_err(leaf, slot, "invalid item size: has %u expect %zu", + item_size, sizeof(*ditem)); + return -EUCLEAN; + } + ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (unlikely(btrfs_device_id(leaf, ditem) != key->offset)) { dev_item_err(leaf, slot, @@ -1007,6 +1015,7 @@ static int check_inode_item(struct extent_buffer *leaf, struct btrfs_inode_item *iitem; u64 super_gen = btrfs_super_generation(fs_info->super_copy); u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); + const u32 item_size = btrfs_item_size(leaf, slot); u32 mode; int ret; u32 flags; @@ -1016,6 +1025,12 @@ static int check_inode_item(struct extent_buffer *leaf, if (unlikely(ret < 0)) return ret; + if (unlikely(item_size != sizeof(*iitem))) { + generic_err(leaf, slot, "invalid item size: has %u expect %zu", + item_size, sizeof(*iitem)); + return -EUCLEAN; + } + iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); /* Here we use super block generation + 1 to handle log tree */ diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c1ddbe800897578c587e5b376a49bcb6060b2e66..3ee014c06b82a544df86c9df228bbcc72d5f3b56 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3414,6 +3414,29 @@ static void free_log_tree(struct btrfs_trans_handle *trans, if (log->node) { ret = walk_log_tree(trans, log, &wc); if (ret) { + /* + * We weren't able to traverse the entire log tree, the + * typical scenario is getting an -EIO when reading an + * extent buffer of the tree, due to a previous writeback + * failure of it. + */ + set_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, + &log->fs_info->fs_state); + + /* + * Some extent buffers of the log tree may still be dirty + * and not yet written back to storage, because we may + * have updates to a log tree without syncing a log tree, + * such as during rename and link operations. So flush + * them out and wait for their writeback to complete, so + * that we properly cleanup their state and pages. + */ + btrfs_write_marked_extents(log->fs_info, + &log->dirty_log_pages, + EXTENT_DIRTY | EXTENT_NEW); + btrfs_wait_tree_log_extents(log, + EXTENT_DIRTY | EXTENT_NEW); + if (trans) btrfs_abort_transaction(trans, ret); else diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c index ce4d4785003cb53de9b0f5aaf299a31c27b4c687..7077f72e6f4747c2a1f1bd6c898221d4bdaab380 100644 --- a/fs/cachefiles/cache.c +++ b/fs/cachefiles/cache.c @@ -49,11 +49,19 @@ int cachefiles_add_cache(struct cachefiles_cache *cache) goto error_unsupported; } - /* check parameters */ + /* Check features of the backing filesystem: + * - Directories must support looking up and directory creation + * - We create tmpfiles to handle invalidation + * - We use xattrs to store metadata + * - We need to be able to query the amount of space available + * - We want to be able to sync the filesystem when stopping the cache + * - We use DIO to/from pages, so the blocksize mustn't be too big. + */ ret = -EOPNOTSUPP; if (d_is_negative(root) || !d_backing_inode(root)->i_op->lookup || !d_backing_inode(root)->i_op->mkdir || + !d_backing_inode(root)->i_op->tmpfile || !(d_backing_inode(root)->i_opflags & IOP_XATTR) || !root->d_sb->s_op->statfs || !root->d_sb->s_op->sync_fs || @@ -84,9 +92,7 @@ int cachefiles_add_cache(struct cachefiles_cache *cache) goto error_unsupported; cache->bsize = stats.f_bsize; - cache->bshift = 0; - if (stats.f_bsize < PAGE_SIZE) - cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize); + cache->bshift = ilog2(stats.f_bsize); _debug("blksize %u (shift %u)", cache->bsize, cache->bshift); @@ -106,7 +112,6 @@ int cachefiles_add_cache(struct cachefiles_cache *cache) (unsigned long long) cache->fcull, (unsigned long long) cache->fstop); - stats.f_blocks >>= cache->bshift; do_div(stats.f_blocks, 100); cache->bstop = stats.f_blocks * cache->bstop_percent; cache->bcull = stats.f_blocks * cache->bcull_percent; @@ -209,7 +214,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache, return ret; } - b_avail = stats.f_bavail >> cache->bshift; + b_avail = stats.f_bavail; b_writing = atomic_long_read(&cache->b_writing); if (b_avail > b_writing) b_avail -= b_writing; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 40a792421fc14239558a5fdc583a0834b5222c51..7ac04ee2c0a0a51bfb6a8e192ec4fecda7dd72b4 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -703,6 +703,17 @@ static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) return -EBUSY; } + /* Make sure we have copies of the tag string */ + if (!cache->tag) { + /* + * The tag string is released by the fops->release() + * function, so we don't release it on error here + */ + cache->tag = kstrdup("CacheFiles", GFP_KERNEL); + if (!cache->tag) + return -ENOMEM; + } + return cachefiles_add_cache(cache); } diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 8dd54d9375b61824135cf1e7fed61bebce098a25..c793d33b0224ec158ef579791e856a7a2726ca74 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -86,7 +86,7 @@ struct cachefiles_cache { unsigned bcull_percent; /* when to start culling (% blocks) */ unsigned bstop_percent; /* when to stop allocating (% blocks) */ unsigned bsize; /* cache's block size */ - unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */ + unsigned bshift; /* ilog2(bsize) */ uint64_t frun; /* when to stop culling */ uint64_t fcull; /* when to start culling */ uint64_t fstop; /* when to stop allocating */ diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index 60b1eac2ce78e6f4519673827396d23fbcf74389..753986ea1583b3139e22ef6edb94f57d9a0beec9 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -191,6 +191,64 @@ presubmission_error: return ret; } +/* + * Query the occupancy of the cache in a region, returning where the next chunk + * of data starts and how long it is. + */ +static int cachefiles_query_occupancy(struct netfs_cache_resources *cres, + loff_t start, size_t len, size_t granularity, + loff_t *_data_start, size_t *_data_len) +{ + struct cachefiles_object *object; + struct file *file; + loff_t off, off2; + + *_data_start = -1; + *_data_len = 0; + + if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ)) + return -ENOBUFS; + + object = cachefiles_cres_object(cres); + file = cachefiles_cres_file(cres); + granularity = max_t(size_t, object->volume->cache->bsize, granularity); + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start, len, + i_size_read(file_inode(file))); + + off = cachefiles_inject_read_error(); + if (off == 0) + off = vfs_llseek(file, start, SEEK_DATA); + if (off == -ENXIO) + return -ENODATA; /* Beyond EOF */ + if (off < 0 && off >= (loff_t)-MAX_ERRNO) + return -ENOBUFS; /* Error. */ + if (round_up(off, granularity) >= start + len) + return -ENODATA; /* No data in range */ + + off2 = cachefiles_inject_read_error(); + if (off2 == 0) + off2 = vfs_llseek(file, off, SEEK_HOLE); + if (off2 == -ENXIO) + return -ENODATA; /* Beyond EOF */ + if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO) + return -ENOBUFS; /* Error. */ + + /* Round away partial blocks */ + off = round_up(off, granularity); + off2 = round_down(off2, granularity); + if (off2 <= off) + return -ENODATA; + + *_data_start = off; + if (off2 > start + len) + *_data_len = len; + else + *_data_len = off2 - off; + return 0; +} + /* * Handle completion of a write to the cache. */ @@ -264,7 +322,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres, ki->term_func = term_func; ki->term_func_priv = term_func_priv; ki->was_async = true; - ki->b_writing = (len + (1 << cache->bshift)) >> cache->bshift; + ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift; if (ki->term_func) ki->iocb.ki_complete = cachefiles_write_complete; @@ -545,6 +603,7 @@ static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { .write = cachefiles_write, .prepare_read = cachefiles_prepare_read, .prepare_write = cachefiles_prepare_write, + .query_occupancy = cachefiles_query_occupancy, }; /* diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 9bd692870617c4cd108845aae0b1891f94584323..f256c8aff7bb5fa7db56210c7961ce56aaedd6d9 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -25,7 +25,9 @@ static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object, trace_cachefiles_mark_active(object, inode); can_use = true; } else { - pr_notice("cachefiles: Inode already in use: %pd\n", dentry); + trace_cachefiles_mark_failed(object, inode); + pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n", + dentry, inode->i_ino); } return can_use; @@ -101,6 +103,7 @@ retry: subdir = lookup_one_len(dirname, dir, strlen(dirname)); else subdir = ERR_PTR(ret); + trace_cachefiles_lookup(NULL, dir, subdir); if (IS_ERR(subdir)) { trace_cachefiles_vfs_error(NULL, d_backing_inode(dir), PTR_ERR(subdir), @@ -135,6 +138,7 @@ retry: cachefiles_trace_mkdir_error); goto mkdir_error; } + trace_cachefiles_mkdir(dir, subdir); if (unlikely(d_unhashed(subdir))) { cachefiles_put_directory(subdir); @@ -233,7 +237,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache, }; int ret; - trace_cachefiles_unlink(object, dentry, why); + trace_cachefiles_unlink(object, d_inode(dentry)->i_ino, why); ret = security_path_unlink(&path, dentry); if (ret < 0) { cachefiles_io_error(cache, "Unlink security error"); @@ -386,7 +390,7 @@ try_again: .new_dir = d_inode(cache->graveyard), .new_dentry = grave, }; - trace_cachefiles_rename(object, rep, grave, why); + trace_cachefiles_rename(object, d_inode(rep)->i_ino, why); ret = cachefiles_inject_read_error(); if (ret == 0) ret = vfs_rename(&rd); @@ -617,7 +621,7 @@ bool cachefiles_look_up_object(struct cachefiles_object *object) object->d_name_len); else dentry = ERR_PTR(ret); - trace_cachefiles_lookup(object, dentry); + trace_cachefiles_lookup(object, fan, dentry); if (IS_ERR(dentry)) { if (dentry == ERR_PTR(-ENOENT)) goto new_file; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index b3d9459c9bbd61c1d9c76dec2a1a8f902994bf34..c98e5238a1b6ae16dccaed2c4a4e92589329ccfa 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -297,10 +297,6 @@ out: dout("%s: result %d\n", __func__, err); } -static void ceph_init_rreq(struct netfs_read_request *rreq, struct file *file) -{ -} - static void ceph_readahead_cleanup(struct address_space *mapping, void *priv) { struct inode *inode = mapping->host; @@ -312,7 +308,6 @@ static void ceph_readahead_cleanup(struct address_space *mapping, void *priv) } static const struct netfs_read_request_ops ceph_netfs_read_ops = { - .init_rreq = ceph_init_rreq, .is_cache_enabled = ceph_is_cache_enabled, .begin_cache_operation = ceph_begin_cache_operation, .issue_op = ceph_netfs_issue_op, diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7d305b97482464e176cabb7a0aa51119baa4d697..b472cd066d1c8f86ea3e13a6538b773cd9bb946f 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2218,6 +2218,7 @@ static int unsafe_request_wait(struct inode *inode) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_mds_request *req1 = NULL, *req2 = NULL; + unsigned int max_sessions; int ret, err = 0; spin_lock(&ci->i_unsafe_lock); @@ -2235,37 +2236,45 @@ static int unsafe_request_wait(struct inode *inode) } spin_unlock(&ci->i_unsafe_lock); + /* + * The mdsc->max_sessions is unlikely to be changed + * mostly, here we will retry it by reallocating the + * sessions array memory to get rid of the mdsc->mutex + * lock. + */ +retry: + max_sessions = mdsc->max_sessions; + /* * Trigger to flush the journal logs in all the relevant MDSes * manually, or in the worst case we must wait at most 5 seconds * to wait the journal logs to be flushed by the MDSes periodically. */ - if (req1 || req2) { + if ((req1 || req2) && likely(max_sessions)) { struct ceph_mds_session **sessions = NULL; struct ceph_mds_session *s; struct ceph_mds_request *req; - unsigned int max; int i; - /* - * The mdsc->max_sessions is unlikely to be changed - * mostly, here we will retry it by reallocating the - * sessions arrary memory to get rid of the mdsc->mutex - * lock. - */ -retry: - max = mdsc->max_sessions; - sessions = krealloc(sessions, max * sizeof(s), __GFP_ZERO); - if (!sessions) - return -ENOMEM; + sessions = kzalloc(max_sessions * sizeof(s), GFP_KERNEL); + if (!sessions) { + err = -ENOMEM; + goto out; + } spin_lock(&ci->i_unsafe_lock); if (req1) { list_for_each_entry(req, &ci->i_unsafe_dirops, r_unsafe_dir_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { + if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) + ceph_put_mds_session(s); + } + kfree(sessions); goto retry; } if (!sessions[s->s_mds]) { @@ -2278,8 +2287,14 @@ retry: list_for_each_entry(req, &ci->i_unsafe_iops, r_unsafe_target_item) { s = req->r_session; - if (unlikely(s->s_mds >= max)) { + if (unlikely(s->s_mds >= max_sessions)) { spin_unlock(&ci->i_unsafe_lock); + for (i = 0; i < max_sessions; i++) { + s = sessions[i]; + if (s) + ceph_put_mds_session(s); + } + kfree(sessions); goto retry; } if (!sessions[s->s_mds]) { @@ -2300,7 +2315,7 @@ retry: spin_unlock(&ci->i_ceph_lock); /* send flush mdlog request to MDSes */ - for (i = 0; i < max; i++) { + for (i = 0; i < max_sessions; i++) { s = sessions[i]; if (s) { send_flush_mdlog(s); @@ -2317,15 +2332,19 @@ retry: ceph_timeout_jiffies(req1->r_timeout)); if (ret) err = -EIO; - ceph_mdsc_put_request(req1); } if (req2) { ret = !wait_for_completion_timeout(&req2->r_safe_completion, ceph_timeout_jiffies(req2->r_timeout)); if (ret) err = -EIO; - ceph_mdsc_put_request(req2); } + +out: + if (req1) + ceph_mdsc_put_request(req1); + if (req2) + ceph_mdsc_put_request(req2); return err; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 5b9104b8e453958bd982341c77a125f9e90e8771..bbed3224ad689c15fa3b288c3536eb64cc4f9935 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -583,6 +583,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, struct ceph_inode_info *ci = ceph_inode(dir); struct inode *inode; struct timespec64 now; + struct ceph_string *pool_ns; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP }; @@ -632,6 +633,12 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, in.max_size = cpu_to_le64(lo->stripe_unit); ceph_file_layout_to_legacy(lo, &in.layout); + /* lo is private, so pool_ns can't change */ + pool_ns = rcu_dereference_raw(lo->pool_ns); + if (pool_ns) { + iinfo.pool_ns_len = pool_ns->len; + iinfo.pool_ns_data = pool_ns->str; + } down_read(&mdsc->snap_rwsem); ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, @@ -750,8 +757,10 @@ retry: restore_deleg_ino(dir, req->r_deleg_ino); ceph_mdsc_put_request(req); try_async = false; + ceph_put_string(rcu_dereference_raw(lo.pool_ns)); goto retry; } + ceph_put_string(rcu_dereference_raw(lo.pool_ns)); goto out_req; } } diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index 346ae8716deb8cd9210d22c52d8e31cc1eb2ab6c..3b7e3b9e4fd2e00a9be441ad6713b896986068c1 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -188,7 +188,7 @@ config CIFS_SMB_DIRECT config CIFS_FSCACHE bool "Provide CIFS client caching support" - depends on CIFS=m && FSCACHE_OLD_API || CIFS=y && FSCACHE_OLD_API=y + depends on CIFS=m && FSCACHE || CIFS=y && FSCACHE=y help Makes CIFS FS-Cache capable. Say Y here if you want your CIFS data to be cached locally on disk through the general filesystem cache diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 87fcacdf3de73a52ba97738711808b6e91a44007..cc8fdcb35b7178296cfa7d2c44bbb9f6fa8b3c2c 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -25,7 +25,7 @@ cifs-$(CONFIG_CIFS_DFS_UPCALL) += cifs_dfs_ref.o dfs_cache.o cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o -cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o +cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c deleted file mode 100644 index 8be57aaedab60b7cb211aaec7dd2a3cb1b3dd1e8..0000000000000000000000000000000000000000 --- a/fs/cifs/cache.c +++ /dev/null @@ -1,105 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 -/* - * CIFS filesystem cache index structure definitions - * - * Copyright (c) 2010 Novell, Inc. - * Authors(s): Suresh Jayaraman (sjayaraman@suse.de> - * - */ -#include "fscache.h" -#include "cifs_debug.h" - -/* - * CIFS filesystem definition for FS-Cache - */ -struct fscache_netfs cifs_fscache_netfs = { - .name = "cifs", - .version = 0, -}; - -/* - * Register CIFS for caching with FS-Cache - */ -int cifs_fscache_register(void) -{ - return fscache_register_netfs(&cifs_fscache_netfs); -} - -/* - * Unregister CIFS for caching - */ -void cifs_fscache_unregister(void) -{ - fscache_unregister_netfs(&cifs_fscache_netfs); -} - -/* - * Server object for FS-Cache - */ -const struct fscache_cookie_def cifs_fscache_server_index_def = { - .name = "CIFS.server", - .type = FSCACHE_COOKIE_TYPE_INDEX, -}; - -static enum -fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data, - const void *data, - uint16_t datalen, - loff_t object_size) -{ - struct cifs_fscache_super_auxdata auxdata; - const struct cifs_tcon *tcon = cookie_netfs_data; - - if (datalen != sizeof(auxdata)) - return FSCACHE_CHECKAUX_OBSOLETE; - - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.resource_id = tcon->resource_id; - auxdata.vol_create_time = tcon->vol_create_time; - auxdata.vol_serial_number = tcon->vol_serial_number; - - if (memcmp(data, &auxdata, datalen) != 0) - return FSCACHE_CHECKAUX_OBSOLETE; - - return FSCACHE_CHECKAUX_OKAY; -} - -/* - * Superblock object for FS-Cache - */ -const struct fscache_cookie_def cifs_fscache_super_index_def = { - .name = "CIFS.super", - .type = FSCACHE_COOKIE_TYPE_INDEX, - .check_aux = cifs_fscache_super_check_aux, -}; - -static enum -fscache_checkaux cifs_fscache_inode_check_aux(void *cookie_netfs_data, - const void *data, - uint16_t datalen, - loff_t object_size) -{ - struct cifs_fscache_inode_auxdata auxdata; - struct cifsInodeInfo *cifsi = cookie_netfs_data; - - if (datalen != sizeof(auxdata)) - return FSCACHE_CHECKAUX_OBSOLETE; - - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.eof = cifsi->server_eof; - auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec; - auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec; - auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec; - auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec; - - if (memcmp(data, &auxdata, datalen) != 0) - return FSCACHE_CHECKAUX_OBSOLETE; - - return FSCACHE_CHECKAUX_OKAY; -} - -const struct fscache_cookie_def cifs_fscache_inode_object_def = { - .name = "CIFS.uniqueid", - .type = FSCACHE_COOKIE_TYPE_DATAFILE, - .check_aux = cifs_fscache_inode_check_aux, -}; diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c index 8f386dd9939ea94b4f15c667c7b22d1ceceaf7eb..cdce1609c5c26afefee1ce0b81a138d434495d93 100644 --- a/fs/cifs/cifs_swn.c +++ b/fs/cifs/cifs_swn.c @@ -396,11 +396,11 @@ static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const ch switch (state) { case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE: cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name); - cifs_ses_mark_for_reconnect(swnreg->tcon->ses); + cifs_mark_tcp_ses_conns_for_reconnect(swnreg->tcon->ses->server, true); break; case CIFS_SWN_RESOURCE_STATE_AVAILABLE: cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name); - cifs_ses_mark_for_reconnect(swnreg->tcon->ses); + cifs_mark_tcp_ses_conns_for_reconnect(swnreg->tcon->ses->server, true); break; case CIFS_SWN_RESOURCE_STATE_UNKNOWN: cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name); @@ -498,10 +498,7 @@ static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *a goto unlock; } - spin_lock(&cifs_tcp_ses_lock); - if (tcon->ses->server->tcpStatus != CifsExiting) - tcon->ses->server->tcpStatus = CifsNeedReconnect; - spin_unlock(&cifs_tcp_ses_lock); + cifs_mark_tcp_ses_conns_for_reconnect(tcon->ses->server, false); unlock: mutex_unlock(&tcon->ses->server->srv_mutex); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 36b2e0cb9736360cc4afcbfa6ff426e8b401ecfd..199edac0cb59f85b14971175010aea2069c5c016 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -397,6 +397,9 @@ static void cifs_evict_inode(struct inode *inode) { truncate_inode_pages_final(&inode->i_data); + if (inode->i_state & I_PINNING_FSCACHE_WB) + cifs_fscache_unuse_inode_cookie(inode, true); + cifs_fscache_release_inode_cookie(inode); clear_inode(inode); } @@ -721,6 +724,12 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) } #endif +static int cifs_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + fscache_unpin_writeback(wbc, cifs_inode_cookie(inode)); + return 0; +} + static int cifs_drop_inode(struct inode *inode) { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); @@ -733,6 +742,7 @@ static int cifs_drop_inode(struct inode *inode) static const struct super_operations cifs_super_ops = { .statfs = cifs_statfs, .alloc_inode = cifs_alloc_inode, + .write_inode = cifs_write_inode, .free_inode = cifs_free_inode, .drop_inode = cifs_drop_inode, .evict_inode = cifs_evict_inode, @@ -1625,13 +1635,9 @@ init_cifs(void) goto out_destroy_cifsoplockd_wq; } - rc = cifs_fscache_register(); - if (rc) - goto out_destroy_deferredclose_wq; - rc = cifs_init_inodecache(); if (rc) - goto out_unreg_fscache; + goto out_destroy_deferredclose_wq; rc = cifs_init_mids(); if (rc) @@ -1693,8 +1699,6 @@ out_destroy_mids: cifs_destroy_mids(); out_destroy_inodecache: cifs_destroy_inodecache(); -out_unreg_fscache: - cifs_fscache_unregister(); out_destroy_deferredclose_wq: destroy_workqueue(deferredclose_wq); out_destroy_cifsoplockd_wq: @@ -1730,7 +1734,6 @@ exit_cifs(void) cifs_destroy_request_bufs(); cifs_destroy_mids(); cifs_destroy_inodecache(); - cifs_fscache_unregister(); destroy_workqueue(deferredclose_wq); destroy_workqueue(cifsoplockd_wq); destroy_workqueue(decrypt_wq); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 9e5d9e192ef042202c079f204e157c6203c2f025..15a5c5db038b8230cd36afab81274096f1e21db2 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -152,5 +152,6 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type, extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.34" +#define SMB3_PRODUCT_BUILD 35 +#define CIFS_VERSION "2.35" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index f84978b76bb6b6800569e38f952a883d7f21b569..48b343d0343091fa18259b8b8bf568268262b848 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -117,6 +117,7 @@ enum statusEnum { CifsInSessSetup, CifsNeedTcon, CifsInTcon, + CifsNeedFilesInvalidate, CifsInFilesInvalidate }; @@ -667,9 +668,6 @@ struct TCP_Server_Info { unsigned int total_read; /* total amount of data read in this pass */ atomic_t in_send; /* requests trying to send */ atomic_t num_waiters; /* blocked waiting to get in sendrecv */ -#ifdef CONFIG_CIFS_FSCACHE - struct fscache_cookie *fscache; /* client index cache cookie */ -#endif #ifdef CONFIG_CIFS_STATS2 atomic_t num_cmds[NUMBER_OF_SMB2_COMMANDS]; /* total requests by cmd */ atomic_t smb2slowcmd[NUMBER_OF_SMB2_COMMANDS]; /* count resps > 1 sec */ @@ -923,6 +921,7 @@ struct cifs_chan { */ struct cifs_ses { struct list_head smb_ses_list; + struct list_head rlist; /* reconnect list */ struct list_head tcon_list; struct cifs_tcon *tcon_ipc; struct mutex session_mutex; @@ -1110,7 +1109,7 @@ struct cifs_tcon { __u32 max_bytes_copy; #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ - struct fscache_cookie *fscache; /* cookie for share */ + struct fscache_volume *fscache; /* cookie for share */ #endif struct list_head pending_opens; /* list of incomplete opens */ struct cached_fid crfid; /* Cached root fid */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index e0dc147e69a85820b2c2a60cbba8496b2ab76cd8..d3701295402d2dc6b91913cb0484ca8fc65367d4 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -131,6 +131,9 @@ extern int SendReceiveBlockingLock(const unsigned int xid, struct smb_hdr *in_buf , struct smb_hdr *out_buf, int *bytes_returned); +void +cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, + bool mark_smb_session); extern int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session); extern int checkSMB(char *buf, unsigned int len, struct TCP_Server_Info *srvr); @@ -647,6 +650,11 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, int match_target_ip(struct TCP_Server_Info *server, const char *share, size_t share_len, bool *result); + +int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid, + struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const char *dfs_link_path); #endif static inline int cifs_create_options(struct cifs_sb_info *cifs_sb, int options) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0f36deff790ec53f5460d2d9d729507861ce0648..053cb449eb167ec415ba13dc3e5cfb97fa4e6428 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -162,43 +162,40 @@ static void cifs_resolve_server(struct work_struct *work) mutex_unlock(&server->srv_mutex); } -/** +/* * Mark all sessions and tcons for reconnect. * * @server needs to be previously set to CifsNeedReconnect. * */ -static void +void cifs_mark_tcp_ses_conns_for_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) { struct TCP_Server_Info *pserver; struct cifs_ses *ses; struct cifs_tcon *tcon; - struct mid_q_entry *mid, *nmid; - struct list_head retry_list; - server->maxBuf = 0; - server->max_read = 0; - - cifs_dbg(FYI, "Mark tcp session as need reconnect\n"); - trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname); /* * before reconnecting the tcp session, mark the smb session (uid) and the tid bad so they * are not used until reconnected. */ - cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect\n", __func__); + cifs_dbg(FYI, "%s: marking necessary sessions and tcons for reconnect\n", __func__); /* If server is a channel, select the primary channel */ pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; + spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { spin_lock(&ses->chan_lock); if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server)) goto next_session; - cifs_chan_set_need_reconnect(ses, server); + if (mark_smb_session) + CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses); + else + cifs_chan_set_need_reconnect(ses, server); /* If all channels need reconnect, then tcon needs reconnect */ if (!mark_smb_session && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) @@ -217,14 +214,19 @@ next_session: spin_unlock(&ses->chan_lock); } spin_unlock(&cifs_tcp_ses_lock); +} + +static void +cifs_abort_connection(struct TCP_Server_Info *server) +{ + struct mid_q_entry *mid, *nmid; + struct list_head retry_list; + + server->maxBuf = 0; + server->max_read = 0; - /* - * before reconnecting the tcp session, mark the smb session (uid) - * and the tid bad so they are not used until reconnected - */ - cifs_dbg(FYI, "%s: marking sessions and tcons for reconnect and tearing down socket\n", - __func__); /* do not want to be sending data on a socket we are freeing */ + cifs_dbg(FYI, "%s: tearing down socket\n", __func__); mutex_lock(&server->srv_mutex); if (server->ssocket) { cifs_dbg(FYI, "State: 0x%x Flags: 0x%lx\n", server->ssocket->state, @@ -280,7 +282,12 @@ static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num wake_up(&server->response_q); return false; } + + cifs_dbg(FYI, "Mark tcp session as need reconnect\n"); + trace_smb3_reconnect(server->CurrentMid, server->conn_id, + server->hostname); server->tcpStatus = CifsNeedReconnect; + spin_unlock(&cifs_tcp_ses_lock); return true; } @@ -308,6 +315,8 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session); + cifs_abort_connection(server); + do { try_to_freeze(); mutex_lock(&server->srv_mutex); @@ -335,11 +344,14 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, spin_unlock(&cifs_tcp_ses_lock); cifs_swn_reset_server_dstaddr(server); mutex_unlock(&server->srv_mutex); + mod_delayed_work(cifsiod_wq, &server->reconnect, 0); } } while (server->tcpStatus == CifsNeedReconnect); + spin_lock(&cifs_tcp_ses_lock); if (server->tcpStatus == CifsNeedNegotiate) mod_delayed_work(cifsiod_wq, &server->echo, 0); + spin_unlock(&cifs_tcp_ses_lock); wake_up(&server->response_q); return rc; @@ -429,6 +441,8 @@ reconnect_dfs_server(struct TCP_Server_Info *server, cifs_mark_tcp_ses_conns_for_reconnect(server, mark_smb_session); + cifs_abort_connection(server); + do { try_to_freeze(); mutex_lock(&server->srv_mutex); @@ -454,6 +468,7 @@ reconnect_dfs_server(struct TCP_Server_Info *server, spin_unlock(&cifs_tcp_ses_lock); cifs_swn_reset_server_dstaddr(server); mutex_unlock(&server->srv_mutex); + mod_delayed_work(cifsiod_wq, &server->reconnect, 0); } while (server->tcpStatus == CifsNeedReconnect); if (target_hint) @@ -1439,10 +1454,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) cifs_crypto_secmech_release(server); - /* fscache server cookies are based on primary channel only */ - if (!CIFS_SERVER_IS_CHAN(server)) - cifs_fscache_release_client_cookie(server); - kfree(server->session_key.response); server->session_key.response = NULL; server->session_key.len = 0; @@ -1604,14 +1615,6 @@ smbd_connected: list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list); spin_unlock(&cifs_tcp_ses_lock); - /* fscache server cookies are based on primary channel only */ - if (!CIFS_SERVER_IS_CHAN(tcp_ses)) - cifs_fscache_get_client_cookie(tcp_ses); -#ifdef CONFIG_CIFS_FSCACHE - else - tcp_ses->fscache = tcp_ses->primary_server->fscache; -#endif /* CONFIG_CIFS_FSCACHE */ - /* queue echo request delayed work */ queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval); @@ -1832,23 +1835,19 @@ void cifs_put_smb_ses(struct cifs_ses *ses) spin_lock(&ses->chan_lock); chan_count = ses->chan_count; - spin_unlock(&ses->chan_lock); /* close any extra channels */ if (chan_count > 1) { int i; for (i = 1; i < chan_count; i++) { - /* - * note: for now, we're okay accessing ses->chans - * without chan_lock. But when chans can go away, we'll - * need to introduce ref counting to make sure that chan - * is not freed from under us. - */ + spin_unlock(&ses->chan_lock); cifs_put_tcp_session(ses->chans[i].server, 0); + spin_lock(&ses->chan_lock); ses->chans[i].server = NULL; } } + spin_unlock(&ses->chan_lock); sesInfoFree(ses); cifs_put_tcp_session(server, 0); @@ -1988,6 +1987,19 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) } } + ctx->workstation_name = kstrdup(ses->workstation_name, GFP_KERNEL); + if (!ctx->workstation_name) { + cifs_dbg(FYI, "Unable to allocate memory for workstation_name\n"); + rc = -ENOMEM; + kfree(ctx->username); + ctx->username = NULL; + kfree_sensitive(ctx->password); + ctx->password = NULL; + kfree(ctx->domainname); + ctx->domainname = NULL; + goto out_key_put; + } + out_key_put: up_read(&key->sem); key_put(key); @@ -2124,8 +2136,10 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) mutex_unlock(&ses->session_mutex); /* each channel uses a different signing key */ + spin_lock(&ses->chan_lock); memcpy(ses->chans[0].signkey, ses->smb3signingkey, sizeof(ses->smb3signingkey)); + spin_unlock(&ses->chan_lock); if (rc) goto get_ses_fail; @@ -2336,10 +2350,19 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) if (ses->server->posix_ext_supported) { tcon->posix_extensions = true; pr_warn_once("SMB3.11 POSIX Extensions are experimental\n"); - } else { + } else if ((ses->server->vals->protocol_id == SMB311_PROT_ID) || + (strcmp(ses->server->vals->version_string, + SMB3ANY_VERSION_STRING) == 0) || + (strcmp(ses->server->vals->version_string, + SMBDEFAULT_VERSION_STRING) == 0)) { cifs_dbg(VFS, "Server does not support mounting with posix SMB3.11 extensions\n"); rc = -EOPNOTSUPP; goto out_fail; + } else { + cifs_dbg(VFS, "Check vers= mount option. SMB3.11 " + "disabled but required for POSIX extensions\n"); + rc = -EOPNOTSUPP; + goto out_fail; } } @@ -3121,7 +3144,8 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) * Inside cifs_fscache_get_super_cookie it checks * that we do not get super cookie twice. */ - cifs_fscache_get_super_cookie(tcon); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) + cifs_fscache_get_super_cookie(tcon); out: mnt_ctx->server = server; @@ -3374,6 +3398,11 @@ static int is_path_remote(struct mount_ctx *mnt_ctx) rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, full_path); +#ifdef CONFIG_CIFS_DFS_UPCALL + if (rc == -ENOENT && is_tcon_dfs(tcon)) + rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon, cifs_sb, + full_path); +#endif if (rc != 0 && rc != -EREMOTE) { kfree(full_path); return rc; @@ -3761,10 +3790,6 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses, if (rc == 0) { bool is_unicode; - spin_lock(&cifs_tcp_ses_lock); - tcon->tidStatus = CifsGood; - spin_unlock(&cifs_tcp_ses_lock); - tcon->need_reconnect = false; tcon->tid = smb_buffer_response->Tid; bcc_ptr = pByteArea(smb_buffer_response); bytes_left = get_bcc(smb_buffer_response); @@ -3879,6 +3904,11 @@ cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, else rc = -EHOSTDOWN; spin_unlock(&cifs_tcp_ses_lock); + } else { + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus == CifsInNegotiate) + server->tcpStatus = CifsNeedNegotiate; + spin_unlock(&cifs_tcp_ses_lock); } return rc; @@ -3898,7 +3928,7 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, spin_unlock(&cifs_tcp_ses_lock); return 0; } - ses->status = CifsInSessSetup; + server->tcpStatus = CifsInSessSetup; spin_unlock(&cifs_tcp_ses_lock); spin_lock(&ses->chan_lock); @@ -3925,8 +3955,24 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses, if (server->ops->sess_setup) rc = server->ops->sess_setup(xid, ses, server, nls_info); - if (rc) + if (rc) { cifs_server_dbg(VFS, "Send error in SessSetup = %d\n", rc); + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus == CifsInSessSetup) + server->tcpStatus = CifsNeedSessSetup; + spin_unlock(&cifs_tcp_ses_lock); + } else { + spin_lock(&cifs_tcp_ses_lock); + if (server->tcpStatus == CifsInSessSetup) + server->tcpStatus = CifsGood; + /* Even if one channel is active, session is in good state */ + ses->status = CifsGood; + spin_unlock(&cifs_tcp_ses_lock); + + spin_lock(&ses->chan_lock); + cifs_chan_clear_need_reconnect(ses, server); + spin_unlock(&ses->chan_lock); + } return rc; } @@ -4271,17 +4317,6 @@ static int __tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *t struct dfs_cache_tgt_iterator *tit; bool target_match; - /* only send once per connect */ - spin_lock(&cifs_tcp_ses_lock); - if (tcon->ses->status != CifsGood || - (tcon->tidStatus != CifsNew && - tcon->tidStatus != CifsNeedTcon)) { - spin_unlock(&cifs_tcp_ses_lock); - return 0; - } - tcon->tidStatus = CifsInTcon; - spin_unlock(&cifs_tcp_ses_lock); - extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len); tit = dfs_cache_get_tgt_iterator(tl); @@ -4381,7 +4416,7 @@ static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tco */ if (rc && server->current_fullpath != server->origin_fullpath) { server->current_fullpath = server->origin_fullpath; - cifs_ses_mark_for_reconnect(tcon->ses); + cifs_reconnect(tcon->ses->server, true); } dfs_cache_free_tgts(tl); @@ -4399,9 +4434,22 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru char *tree; struct dfs_info3_param ref = {0}; + /* only send once per connect */ + spin_lock(&cifs_tcp_ses_lock); + if (tcon->ses->status != CifsGood || + (tcon->tidStatus != CifsNew && + tcon->tidStatus != CifsNeedTcon)) { + spin_unlock(&cifs_tcp_ses_lock); + return 0; + } + tcon->tidStatus = CifsInTcon; + spin_unlock(&cifs_tcp_ses_lock); + tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL); - if (!tree) - return -ENOMEM; + if (!tree) { + rc = -ENOMEM; + goto out; + } if (tcon->ipc) { scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); @@ -4433,11 +4481,25 @@ out: kfree(tree); cifs_put_tcp_super(sb); + if (rc) { + spin_lock(&cifs_tcp_ses_lock); + if (tcon->tidStatus == CifsInTcon) + tcon->tidStatus = CifsNeedTcon; + spin_unlock(&cifs_tcp_ses_lock); + } else { + spin_lock(&cifs_tcp_ses_lock); + if (tcon->tidStatus == CifsInTcon) + tcon->tidStatus = CifsGood; + spin_unlock(&cifs_tcp_ses_lock); + tcon->need_reconnect = false; + } + return rc; } #else int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc) { + int rc; const struct smb_version_operations *ops = tcon->ses->server->ops; /* only send once per connect */ @@ -4451,6 +4513,20 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru tcon->tidStatus = CifsInTcon; spin_unlock(&cifs_tcp_ses_lock); - return ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc); + rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc); + if (rc) { + spin_lock(&cifs_tcp_ses_lock); + if (tcon->tidStatus == CifsInTcon) + tcon->tidStatus = CifsNeedTcon; + spin_unlock(&cifs_tcp_ses_lock); + } else { + spin_lock(&cifs_tcp_ses_lock); + if (tcon->tidStatus == CifsInTcon) + tcon->tidStatus = CifsGood; + spin_unlock(&cifs_tcp_ses_lock); + tcon->need_reconnect = false; + } + + return rc; } #endif diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index e9b0fa2a9614a4e63ba767456d868f8b276192a9..831f42458bf6da5fd77431084df8c017bb747eba 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -1355,7 +1355,7 @@ static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cach } cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__); - cifs_ses_mark_for_reconnect(tcon->ses); + cifs_mark_tcp_ses_conns_for_reconnect(tcon->ses->server, true); } /* Refresh dfs referral of tcon and mark it for reconnect if needed */ diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 6e8e7cc26ae24926164a9ceed85dc676ddcc53a2..ce9b22aecfbad9f02bc76b92376288d0efa486a8 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -22,6 +22,7 @@ #include "cifs_unicode.h" #include "fs_context.h" #include "cifs_ioctl.h" +#include "fscache.h" static void renew_parental_timestamps(struct dentry *direntry) @@ -507,8 +508,12 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); rc = -ENOMEM; + goto out; } + fscache_use_cookie(cifs_inode_cookie(file_inode(file)), + file->f_mode & FMODE_WRITE); + out: cifs_put_tlink(tlink); out_free_xid: diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 9fee3af83a7369ba7d748ca3b36c66d0f7808cfc..e7af802dcfa600293aa6bf58539dd769892ac6ff 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -376,8 +376,6 @@ static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file) struct cifsLockInfo *li, *tmp; struct super_block *sb = inode->i_sb; - cifs_fscache_release_inode_cookie(inode); - /* * Delete any outstanding lock records. We'll lose them when the file * is closed anyway. @@ -570,7 +568,7 @@ int cifs_open(struct inode *inode, struct file *file) spin_lock(&CIFS_I(inode)->deferred_lock); cifs_del_deferred_close(cfile); spin_unlock(&CIFS_I(inode)->deferred_lock); - goto out; + goto use_cache; } else { _cifsFileInfo_put(cfile, true, false); } @@ -632,8 +630,6 @@ int cifs_open(struct inode *inode, struct file *file) goto out; } - cifs_fscache_set_inode_cookie(inode, file); - if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) { /* * Time to set mode which we can not set earlier due to @@ -652,6 +648,15 @@ int cifs_open(struct inode *inode, struct file *file) cfile->pid); } +use_cache: + fscache_use_cookie(cifs_inode_cookie(file_inode(file)), + file->f_mode & FMODE_WRITE); + if (file->f_flags & O_DIRECT && + (!((file->f_flags & O_ACCMODE) != O_RDONLY) || + file->f_flags & O_APPEND)) + cifs_invalidate_cache(file_inode(file), + FSCACHE_INVAL_DIO_WRITE); + out: free_dentry_path(page); free_xid(xid); @@ -876,6 +881,8 @@ int cifs_close(struct inode *inode, struct file *file) struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_deferred_close *dclose; + cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE); + if (file->private_data != NULL) { cfile = file->private_data; file->private_data = NULL; @@ -886,7 +893,6 @@ int cifs_close(struct inode *inode, struct file *file) dclose) { if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) { inode->i_ctime = inode->i_mtime = current_time(inode); - cifs_fscache_update_inode_cookie(inode); } spin_lock(&cinode->deferred_lock); cifs_add_deferred_close(cfile, dclose); @@ -4198,10 +4204,12 @@ static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf) { struct page *page = vmf->page; - struct file *file = vmf->vma->vm_file; - struct inode *inode = file_inode(file); - cifs_fscache_wait_on_page_write(inode, page); +#ifdef CONFIG_CIFS_FSCACHE + if (PageFsCache(page) && + wait_on_page_fscache_killable(page) < 0) + return VM_FAULT_RETRY; +#endif lock_page(page); return VM_FAULT_LOCKED; @@ -4261,8 +4269,6 @@ cifs_readv_complete(struct work_struct *work) for (i = 0; i < rdata->nr_pages; i++) { struct page *page = rdata->pages[i]; - lru_cache_add(page); - if (rdata->result == 0 || (rdata->result == -EAGAIN && got_bytes)) { flush_dcache_page(page); @@ -4270,13 +4276,11 @@ cifs_readv_complete(struct work_struct *work) } else SetPageError(page); - unlock_page(page); - if (rdata->result == 0 || (rdata->result == -EAGAIN && got_bytes)) cifs_readpage_to_fscache(rdata->mapping->host, page); - else - cifs_fscache_uncache_page(rdata->mapping->host, page); + + unlock_page(page); got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes); @@ -4334,7 +4338,6 @@ readpages_fill_pages(struct TCP_Server_Info *server, * fill them until the writes are flushed. */ zero_user(page, 0, PAGE_SIZE); - lru_cache_add(page); flush_dcache_page(page); SetPageUptodate(page); unlock_page(page); @@ -4344,7 +4347,6 @@ readpages_fill_pages(struct TCP_Server_Info *server, continue; } else { /* no need to hold page hostage */ - lru_cache_add(page); unlock_page(page); put_page(page); rdata->pages[i] = NULL; @@ -4387,92 +4389,20 @@ cifs_readpages_copy_into_pages(struct TCP_Server_Info *server, return readpages_fill_pages(server, rdata, iter, iter->count); } -static int -readpages_get_pages(struct address_space *mapping, struct list_head *page_list, - unsigned int rsize, struct list_head *tmplist, - unsigned int *nr_pages, loff_t *offset, unsigned int *bytes) +static void cifs_readahead(struct readahead_control *ractl) { - struct page *page, *tpage; - unsigned int expected_index; int rc; - gfp_t gfp = readahead_gfp_mask(mapping); - - INIT_LIST_HEAD(tmplist); - - page = lru_to_page(page_list); - - /* - * Lock the page and put it in the cache. Since no one else - * should have access to this page, we're safe to simply set - * PG_locked without checking it first. - */ - __SetPageLocked(page); - rc = add_to_page_cache_locked(page, mapping, - page->index, gfp); - - /* give up if we can't stick it in the cache */ - if (rc) { - __ClearPageLocked(page); - return rc; - } - - /* move first page to the tmplist */ - *offset = (loff_t)page->index << PAGE_SHIFT; - *bytes = PAGE_SIZE; - *nr_pages = 1; - list_move_tail(&page->lru, tmplist); - - /* now try and add more pages onto the request */ - expected_index = page->index + 1; - list_for_each_entry_safe_reverse(page, tpage, page_list, lru) { - /* discontinuity ? */ - if (page->index != expected_index) - break; - - /* would this page push the read over the rsize? */ - if (*bytes + PAGE_SIZE > rsize) - break; - - __SetPageLocked(page); - rc = add_to_page_cache_locked(page, mapping, page->index, gfp); - if (rc) { - __ClearPageLocked(page); - break; - } - list_move_tail(&page->lru, tmplist); - (*bytes) += PAGE_SIZE; - expected_index++; - (*nr_pages)++; - } - return rc; -} - -static int cifs_readpages(struct file *file, struct address_space *mapping, - struct list_head *page_list, unsigned num_pages) -{ - int rc; - int err = 0; - struct list_head tmplist; - struct cifsFileInfo *open_file = file->private_data; - struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); + struct cifsFileInfo *open_file = ractl->file->private_data; + struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file); struct TCP_Server_Info *server; pid_t pid; - unsigned int xid; + unsigned int xid, nr_pages, last_batch_size = 0, cache_nr_pages = 0; + pgoff_t next_cached = ULONG_MAX; + bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) && + cifs_inode_cookie(ractl->mapping->host)->cache_priv; + bool check_cache = caching; xid = get_xid(); - /* - * Reads as many pages as possible from fscache. Returns -ENOBUFS - * immediately if the cookie is negative - * - * After this point, every page in the list might have PG_fscache set, - * so we will need to clean that up off of every page we don't use. - */ - rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list, - &num_pages); - if (rc == 0) { - free_xid(xid); - return rc; - } if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; @@ -4483,39 +4413,73 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses); cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n", - __func__, file, mapping, num_pages); + __func__, ractl->file, ractl->mapping, readahead_count(ractl)); /* - * Start with the page at end of list and move it to private - * list. Do the same with any following pages until we hit - * the rsize limit, hit an index discontinuity, or run out of - * pages. Issue the async read and then start the loop again - * until the list is empty. - * - * Note that list order is important. The page_list is in - * the order of declining indexes. When we put the pages in - * the rdata->pages, then we want them in increasing order. + * Chop the readahead request up into rsize-sized read requests. */ - while (!list_empty(page_list) && !err) { - unsigned int i, nr_pages, bytes, rsize; - loff_t offset; - struct page *page, *tpage; + while ((nr_pages = readahead_count(ractl) - last_batch_size)) { + unsigned int i, got, rsize; + struct page *page; struct cifs_readdata *rdata; struct cifs_credits credits_on_stack; struct cifs_credits *credits = &credits_on_stack; + pgoff_t index = readahead_index(ractl) + last_batch_size; + + /* + * Find out if we have anything cached in the range of + * interest, and if so, where the next chunk of cached data is. + */ + if (caching) { + if (check_cache) { + rc = cifs_fscache_query_occupancy( + ractl->mapping->host, index, nr_pages, + &next_cached, &cache_nr_pages); + if (rc < 0) + caching = false; + check_cache = false; + } + + if (index == next_cached) { + /* + * TODO: Send a whole batch of pages to be read + * by the cache. + */ + page = readahead_page(ractl); + last_batch_size = 1 << thp_order(page); + if (cifs_readpage_from_fscache(ractl->mapping->host, + page) < 0) { + /* + * TODO: Deal with cache read failure + * here, but for the moment, delegate + * that to readpage. + */ + caching = false; + } + unlock_page(page); + next_cached++; + cache_nr_pages--; + if (cache_nr_pages == 0) + check_cache = true; + continue; + } + } if (open_file->invalidHandle) { rc = cifs_reopen_file(open_file, true); - if (rc == -EAGAIN) - continue; - else if (rc) + if (rc) { + if (rc == -EAGAIN) + continue; break; + } } rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize, &rsize, credits); if (rc) break; + nr_pages = min_t(size_t, rsize / PAGE_SIZE, readahead_count(ractl)); + nr_pages = min_t(size_t, nr_pages, next_cached - index); /* * Give up immediately if rsize is too small to read an entire @@ -4523,16 +4487,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, * reach this point however since we set ra_pages to 0 when the * rsize is smaller than a cache page. */ - if (unlikely(rsize < PAGE_SIZE)) { - add_credits_and_wake_if(server, credits, 0); - free_xid(xid); - return 0; - } - - nr_pages = 0; - err = readpages_get_pages(mapping, page_list, rsize, &tmplist, - &nr_pages, &offset, &bytes); - if (!nr_pages) { + if (unlikely(!nr_pages)) { add_credits_and_wake_if(server, credits, 0); break; } @@ -4540,36 +4495,31 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete); if (!rdata) { /* best to give up if we're out of mem */ - list_for_each_entry_safe(page, tpage, &tmplist, lru) { - list_del(&page->lru); - lru_cache_add(page); - unlock_page(page); - put_page(page); - } - rc = -ENOMEM; add_credits_and_wake_if(server, credits, 0); break; } - rdata->cfile = cifsFileInfo_get(open_file); - rdata->server = server; - rdata->mapping = mapping; - rdata->offset = offset; - rdata->bytes = bytes; - rdata->pid = pid; - rdata->pagesz = PAGE_SIZE; - rdata->tailsz = PAGE_SIZE; + got = __readahead_batch(ractl, rdata->pages, nr_pages); + if (got != nr_pages) { + pr_warn("__readahead_batch() returned %u/%u\n", + got, nr_pages); + nr_pages = got; + } + + rdata->nr_pages = nr_pages; + rdata->bytes = readahead_batch_length(ractl); + rdata->cfile = cifsFileInfo_get(open_file); + rdata->server = server; + rdata->mapping = ractl->mapping; + rdata->offset = readahead_pos(ractl); + rdata->pid = pid; + rdata->pagesz = PAGE_SIZE; + rdata->tailsz = PAGE_SIZE; rdata->read_into_pages = cifs_readpages_read_into_pages; rdata->copy_into_pages = cifs_readpages_copy_into_pages; - rdata->credits = credits_on_stack; - - list_for_each_entry_safe(page, tpage, &tmplist, lru) { - list_del(&page->lru); - rdata->pages[rdata->nr_pages++] = page; - } + rdata->credits = credits_on_stack; rc = adjust_credits(server, &rdata->credits, rdata->bytes); - if (!rc) { if (rdata->cfile->invalidHandle) rc = -EAGAIN; @@ -4581,7 +4531,6 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, add_credits_and_wake_if(server, &rdata->credits, 0); for (i = 0; i < rdata->nr_pages; i++) { page = rdata->pages[i]; - lru_cache_add(page); unlock_page(page); put_page(page); } @@ -4591,15 +4540,10 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, } kref_put(&rdata->refcount, cifs_readdata_release); + last_batch_size = nr_pages; } - /* Any pages that have been shown to fscache but didn't get added to - * the pagecache must be uncached before they get returned to the - * allocator. - */ - cifs_fscache_readpages_cancel(mapping->host, page_list); free_xid(xid); - return rc; } /* @@ -4801,17 +4745,19 @@ static int cifs_release_page(struct page *page, gfp_t gfp) { if (PagePrivate(page)) return 0; - - return cifs_fscache_release_page(page, gfp); + if (PageFsCache(page)) { + if (current_is_kswapd() || !(gfp & __GFP_FS)) + return false; + wait_on_page_fscache(page); + } + fscache_note_page_release(cifs_inode_cookie(page->mapping->host)); + return true; } static void cifs_invalidate_page(struct page *page, unsigned int offset, unsigned int length) { - struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host); - - if (offset == 0 && length == PAGE_SIZE) - cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); + wait_on_page_fscache(page); } static int cifs_launder_page(struct page *page) @@ -4831,7 +4777,7 @@ static int cifs_launder_page(struct page *page) if (clear_page_dirty_for_io(page)) rc = cifs_writepage_locked(page, &wbc); - cifs_fscache_invalidate_page(page, page->mapping->host); + wait_on_page_fscache(page); return rc; } @@ -4921,7 +4867,7 @@ oplock_break_done: * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests * so this method should never be called. * - * Direct IO is not yet supported in the cached mode. + * Direct IO is not yet supported in the cached mode. */ static ssize_t cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter) @@ -4988,14 +4934,27 @@ static void cifs_swap_deactivate(struct file *file) /* do we need to unpin (or unlock) the file */ } +/* + * Mark a page as having been made dirty and thus needing writeback. We also + * need to pin the cache object to write back to. + */ +#ifdef CONFIG_CIFS_FSCACHE +static int cifs_set_page_dirty(struct page *page) +{ + return fscache_set_page_dirty(page, cifs_inode_cookie(page->mapping->host)); +} +#else +#define cifs_set_page_dirty __set_page_dirty_nobuffers +#endif + const struct address_space_operations cifs_addr_ops = { .readpage = cifs_readpage, - .readpages = cifs_readpages, + .readahead = cifs_readahead, .writepage = cifs_writepage, .writepages = cifs_writepages, .write_begin = cifs_write_begin, .write_end = cifs_write_end, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = cifs_set_page_dirty, .releasepage = cifs_release_page, .direct_IO = cifs_direct_io, .invalidatepage = cifs_invalidate_page, @@ -5020,7 +4979,7 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { .writepages = cifs_writepages, .write_begin = cifs_write_begin, .write_end = cifs_write_end, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = cifs_set_page_dirty, .releasepage = cifs_release_page, .invalidatepage = cifs_invalidate_page, .launder_page = cifs_launder_page, diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index e3ed25dc6f3f6c4225273b5fe8839d9be3545433..7ec35f3f0a5fcb20d874e616b2bbf75dc8322d56 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -37,6 +37,8 @@ #include "rfc1002pdu.h" #include "fs_context.h" +static DEFINE_MUTEX(cifs_mount_mutex); + static const match_table_t cifs_smb_version_tokens = { { Smb_1, SMB1_VERSION_STRING }, { Smb_20, SMB20_VERSION_STRING}, @@ -707,10 +709,14 @@ static int smb3_get_tree_common(struct fs_context *fc) static int smb3_get_tree(struct fs_context *fc) { int err = smb3_fs_context_validate(fc); + int ret; if (err) return err; - return smb3_get_tree_common(fc); + mutex_lock(&cifs_mount_mutex); + ret = smb3_get_tree_common(fc); + mutex_unlock(&cifs_mount_mutex); + return ret; } static void smb3_fs_context_free(struct fs_context *fc) diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 003c5f1f4dfb1a058afee9a917818f6c43891d75..33af72e0ac0c52d7d3f9f8d23275f9bba2574078 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -12,332 +12,249 @@ #include "cifs_fs_sb.h" #include "cifsproto.h" -/* - * Key layout of CIFS server cache index object - */ -struct cifs_server_key { - __u64 conn_id; -} __packed; - -/* - * Get a cookie for a server object keyed by {IPaddress,port,family} tuple - */ -void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) -{ - struct cifs_server_key key; - - /* - * Check if cookie was already initialized so don't reinitialize it. - * In the future, as we integrate with newer fscache features, - * we may want to instead add a check if cookie has changed - */ - if (server->fscache) - return; - - memset(&key, 0, sizeof(key)); - key.conn_id = server->conn_id; - - server->fscache = - fscache_acquire_cookie(cifs_fscache_netfs.primary_index, - &cifs_fscache_server_index_def, - &key, sizeof(key), - NULL, 0, - server, 0, true); - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", - __func__, server, server->fscache); -} - -void cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) +static void cifs_fscache_fill_volume_coherency( + struct cifs_tcon *tcon, + struct cifs_fscache_volume_coherency_data *cd) { - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", - __func__, server, server->fscache); - fscache_relinquish_cookie(server->fscache, NULL, false); - server->fscache = NULL; + memset(cd, 0, sizeof(*cd)); + cd->resource_id = cpu_to_le64(tcon->resource_id); + cd->vol_create_time = tcon->vol_create_time; + cd->vol_serial_number = cpu_to_le32(tcon->vol_serial_number); } -void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) +int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) { + struct cifs_fscache_volume_coherency_data cd; struct TCP_Server_Info *server = tcon->ses->server; + struct fscache_volume *vcookie; + const struct sockaddr *sa = (struct sockaddr *)&server->dstaddr; + size_t slen, i; char *sharename; - struct cifs_fscache_super_auxdata auxdata; + char *key; + int ret = -ENOMEM; + + tcon->fscache = NULL; + switch (sa->sa_family) { + case AF_INET: + case AF_INET6: + break; + default: + cifs_dbg(VFS, "Unknown network family '%d'\n", sa->sa_family); + return -EINVAL; + } - /* - * Check if cookie was already initialized so don't reinitialize it. - * In the future, as we integrate with newer fscache features, - * we may want to instead add a check if cookie has changed - */ - if (tcon->fscache) - return; + memset(&key, 0, sizeof(key)); sharename = extract_sharename(tcon->treeName); if (IS_ERR(sharename)) { cifs_dbg(FYI, "%s: couldn't extract sharename\n", __func__); - tcon->fscache = NULL; - return; + return -EINVAL; + } + + slen = strlen(sharename); + for (i = 0; i < slen; i++) + if (sharename[i] == '/') + sharename[i] = ';'; + + key = kasprintf(GFP_KERNEL, "cifs,%pISpc,%s", sa, sharename); + if (!key) + goto out; + + cifs_fscache_fill_volume_coherency(tcon, &cd); + vcookie = fscache_acquire_volume(key, + NULL, /* preferred_cache */ + &cd, sizeof(cd)); + cifs_dbg(FYI, "%s: (%s/0x%p)\n", __func__, key, vcookie); + if (IS_ERR(vcookie)) { + if (vcookie != ERR_PTR(-EBUSY)) { + ret = PTR_ERR(vcookie); + goto out_2; + } + pr_err("Cache volume key already in use (%s)\n", key); + vcookie = NULL; } - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.resource_id = tcon->resource_id; - auxdata.vol_create_time = tcon->vol_create_time; - auxdata.vol_serial_number = tcon->vol_serial_number; - - tcon->fscache = - fscache_acquire_cookie(server->fscache, - &cifs_fscache_super_index_def, - sharename, strlen(sharename), - &auxdata, sizeof(auxdata), - tcon, 0, true); + tcon->fscache = vcookie; + ret = 0; +out_2: + kfree(key); +out: kfree(sharename); - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", - __func__, server->fscache, tcon->fscache); + return ret; } void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) { - struct cifs_fscache_super_auxdata auxdata; - - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.resource_id = tcon->resource_id; - auxdata.vol_create_time = tcon->vol_create_time; - auxdata.vol_serial_number = tcon->vol_serial_number; + struct cifs_fscache_volume_coherency_data cd; cifs_dbg(FYI, "%s: (0x%p)\n", __func__, tcon->fscache); - fscache_relinquish_cookie(tcon->fscache, &auxdata, false); - tcon->fscache = NULL; -} - -static void cifs_fscache_acquire_inode_cookie(struct cifsInodeInfo *cifsi, - struct cifs_tcon *tcon) -{ - struct cifs_fscache_inode_auxdata auxdata; - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.eof = cifsi->server_eof; - auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec; - auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec; - auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec; - auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec; - - cifsi->fscache = - fscache_acquire_cookie(tcon->fscache, - &cifs_fscache_inode_object_def, - &cifsi->uniqueid, sizeof(cifsi->uniqueid), - &auxdata, sizeof(auxdata), - cifsi, cifsi->vfs_inode.i_size, true); + cifs_fscache_fill_volume_coherency(tcon, &cd); + fscache_relinquish_volume(tcon->fscache, &cd, false); + tcon->fscache = NULL; } -static void cifs_fscache_enable_inode_cookie(struct inode *inode) +void cifs_fscache_get_inode_cookie(struct inode *inode) { + struct cifs_fscache_inode_coherency_data cd; struct cifsInodeInfo *cifsi = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); - if (cifsi->fscache) - return; - - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)) - return; - - cifs_fscache_acquire_inode_cookie(cifsi, tcon); + cifs_fscache_fill_coherency(&cifsi->vfs_inode, &cd); - cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n", - __func__, tcon->fscache, cifsi->fscache); + cifsi->fscache = + fscache_acquire_cookie(tcon->fscache, 0, + &cifsi->uniqueid, sizeof(cifsi->uniqueid), + &cd, sizeof(cd), + i_size_read(&cifsi->vfs_inode)); } -void cifs_fscache_release_inode_cookie(struct inode *inode) +void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) { - struct cifs_fscache_inode_auxdata auxdata; - struct cifsInodeInfo *cifsi = CIFS_I(inode); - - if (cifsi->fscache) { - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.eof = cifsi->server_eof; - auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec; - auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec; - auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec; - auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec; - - cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache); - /* fscache_relinquish_cookie does not seem to update auxdata */ - fscache_update_cookie(cifsi->fscache, &auxdata); - fscache_relinquish_cookie(cifsi->fscache, &auxdata, false); - cifsi->fscache = NULL; + if (update) { + struct cifs_fscache_inode_coherency_data cd; + loff_t i_size = i_size_read(inode); + + cifs_fscache_fill_coherency(inode, &cd); + fscache_unuse_cookie(cifs_inode_cookie(inode), &cd, &i_size); + } else { + fscache_unuse_cookie(cifs_inode_cookie(inode), NULL, NULL); } } -void cifs_fscache_update_inode_cookie(struct inode *inode) +void cifs_fscache_release_inode_cookie(struct inode *inode) { - struct cifs_fscache_inode_auxdata auxdata; struct cifsInodeInfo *cifsi = CIFS_I(inode); if (cifsi->fscache) { - memset(&auxdata, 0, sizeof(auxdata)); - auxdata.eof = cifsi->server_eof; - auxdata.last_write_time_sec = cifsi->vfs_inode.i_mtime.tv_sec; - auxdata.last_change_time_sec = cifsi->vfs_inode.i_ctime.tv_sec; - auxdata.last_write_time_nsec = cifsi->vfs_inode.i_mtime.tv_nsec; - auxdata.last_change_time_nsec = cifsi->vfs_inode.i_ctime.tv_nsec; - cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cifsi->fscache); - fscache_update_cookie(cifsi->fscache, &auxdata); - } -} - -void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) -{ - cifs_fscache_enable_inode_cookie(inode); -} - -void cifs_fscache_reset_inode_cookie(struct inode *inode) -{ - struct cifsInodeInfo *cifsi = CIFS_I(inode); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); - struct fscache_cookie *old = cifsi->fscache; - - if (cifsi->fscache) { - /* retire the current fscache cache and get a new one */ - fscache_relinquish_cookie(cifsi->fscache, NULL, true); - - cifs_fscache_acquire_inode_cookie(cifsi, tcon); - cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n", - __func__, cifsi->fscache, old); + fscache_relinquish_cookie(cifsi->fscache, false); + cifsi->fscache = NULL; } } -int cifs_fscache_release_page(struct page *page, gfp_t gfp) +static inline void fscache_end_operation(struct netfs_cache_resources *cres) { - if (PageFsCache(page)) { - struct inode *inode = page->mapping->host; - struct cifsInodeInfo *cifsi = CIFS_I(inode); - - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", - __func__, page, cifsi->fscache); - if (!fscache_maybe_release_page(cifsi->fscache, page, gfp)) - return 0; - } + const struct netfs_cache_ops *ops = fscache_operation_valid(cres); - return 1; -} - -static void cifs_readpage_from_fscache_complete(struct page *page, void *ctx, - int error) -{ - cifs_dbg(FYI, "%s: (0x%p/%d)\n", __func__, page, error); - if (!error) - SetPageUptodate(page); - unlock_page(page); + if (ops) + ops->end_operation(cres); } /* - * Retrieve a page from FS-Cache + * Fallback page reading interface. */ -int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) +static int fscache_fallback_read_page(struct inode *inode, struct page *page) { + struct netfs_cache_resources cres; + struct fscache_cookie *cookie = cifs_inode_cookie(inode); + struct iov_iter iter; + struct bio_vec bvec[1]; int ret; - cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n", - __func__, CIFS_I(inode)->fscache, page, inode); - ret = fscache_read_or_alloc_page(CIFS_I(inode)->fscache, page, - cifs_readpage_from_fscache_complete, - NULL, - GFP_KERNEL); - switch (ret) { - - case 0: /* page found in fscache, read submitted */ - cifs_dbg(FYI, "%s: submitted\n", __func__); + memset(&cres, 0, sizeof(cres)); + bvec[0].bv_page = page; + bvec[0].bv_offset = 0; + bvec[0].bv_len = PAGE_SIZE; + iov_iter_bvec(&iter, READ, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); + + ret = fscache_begin_read_operation(&cres, cookie); + if (ret < 0) return ret; - case -ENOBUFS: /* page won't be cached */ - case -ENODATA: /* page not in cache */ - cifs_dbg(FYI, "%s: %d\n", __func__, ret); - return 1; - default: - cifs_dbg(VFS, "unknown error ret = %d\n", ret); - } + ret = fscache_read(&cres, page_offset(page), &iter, NETFS_READ_HOLE_FAIL, + NULL, NULL); + fscache_end_operation(&cres); return ret; } /* - * Retrieve a set of pages from FS-Cache + * Fallback page writing interface. */ -int __cifs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) +static int fscache_fallback_write_page(struct inode *inode, struct page *page, + bool no_space_allocated_yet) { + struct netfs_cache_resources cres; + struct fscache_cookie *cookie = cifs_inode_cookie(inode); + struct iov_iter iter; + struct bio_vec bvec[1]; + loff_t start = page_offset(page); + size_t len = PAGE_SIZE; int ret; - cifs_dbg(FYI, "%s: (0x%p/%u/0x%p)\n", - __func__, CIFS_I(inode)->fscache, *nr_pages, inode); - ret = fscache_read_or_alloc_pages(CIFS_I(inode)->fscache, mapping, - pages, nr_pages, - cifs_readpage_from_fscache_complete, - NULL, - mapping_gfp_mask(mapping)); - switch (ret) { - case 0: /* read submitted to the cache for all pages */ - cifs_dbg(FYI, "%s: submitted\n", __func__); - return ret; - - case -ENOBUFS: /* some pages are not cached and can't be */ - case -ENODATA: /* some pages are not cached */ - cifs_dbg(FYI, "%s: no page\n", __func__); - return 1; + memset(&cres, 0, sizeof(cres)); + bvec[0].bv_page = page; + bvec[0].bv_offset = 0; + bvec[0].bv_len = PAGE_SIZE; + iov_iter_bvec(&iter, WRITE, bvec, ARRAY_SIZE(bvec), PAGE_SIZE); - default: - cifs_dbg(FYI, "unknown error ret = %d\n", ret); - } + ret = fscache_begin_write_operation(&cres, cookie); + if (ret < 0) + return ret; + ret = cres.ops->prepare_write(&cres, &start, &len, i_size_read(inode), + no_space_allocated_yet); + if (ret == 0) + ret = fscache_write(&cres, page_offset(page), &iter, NULL, NULL); + fscache_end_operation(&cres); return ret; } -void __cifs_readpage_to_fscache(struct inode *inode, struct page *page) +/* + * Retrieve a page from FS-Cache + */ +int __cifs_readpage_from_fscache(struct inode *inode, struct page *page) { - struct cifsInodeInfo *cifsi = CIFS_I(inode); int ret; - WARN_ON(!cifsi->fscache); + cifs_dbg(FYI, "%s: (fsc:%p, p:%p, i:0x%p\n", + __func__, cifs_inode_cookie(inode), page, inode); - cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n", - __func__, cifsi->fscache, page, inode); - ret = fscache_write_page(cifsi->fscache, page, - cifsi->vfs_inode.i_size, GFP_KERNEL); - if (ret != 0) - fscache_uncache_page(cifsi->fscache, page); -} + ret = fscache_fallback_read_page(inode, page); + if (ret < 0) + return ret; -void __cifs_fscache_readpages_cancel(struct inode *inode, struct list_head *pages) -{ - cifs_dbg(FYI, "%s: (fsc: %p, i: %p)\n", - __func__, CIFS_I(inode)->fscache, inode); - fscache_readpages_cancel(CIFS_I(inode)->fscache, pages); + /* Read completed synchronously */ + SetPageUptodate(page); + return 0; } -void __cifs_fscache_invalidate_page(struct page *page, struct inode *inode) +void __cifs_readpage_to_fscache(struct inode *inode, struct page *page) { - struct cifsInodeInfo *cifsi = CIFS_I(inode); - struct fscache_cookie *cookie = cifsi->fscache; + cifs_dbg(FYI, "%s: (fsc: %p, p: %p, i: %p)\n", + __func__, cifs_inode_cookie(inode), page, inode); - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie); - fscache_wait_on_page_write(cookie, page); - fscache_uncache_page(cookie, page); + fscache_fallback_write_page(inode, page, true); } -void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page) +/* + * Query the cache occupancy. + */ +int __cifs_fscache_query_occupancy(struct inode *inode, + pgoff_t first, unsigned int nr_pages, + pgoff_t *_data_first, + unsigned int *_data_nr_pages) { - struct cifsInodeInfo *cifsi = CIFS_I(inode); - struct fscache_cookie *cookie = cifsi->fscache; + struct netfs_cache_resources cres; + struct fscache_cookie *cookie = cifs_inode_cookie(inode); + loff_t start, data_start; + size_t len, data_len; + int ret; - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie); - fscache_wait_on_page_write(cookie, page); -} + ret = fscache_begin_read_operation(&cres, cookie); + if (ret < 0) + return ret; -void __cifs_fscache_uncache_page(struct inode *inode, struct page *page) -{ - struct cifsInodeInfo *cifsi = CIFS_I(inode); - struct fscache_cookie *cookie = cifsi->fscache; + start = first * PAGE_SIZE; + len = nr_pages * PAGE_SIZE; + ret = cres.ops->query_occupancy(&cres, start, len, PAGE_SIZE, + &data_start, &data_len); + if (ret == 0) { + *_data_first = data_start / PAGE_SIZE; + *_data_nr_pages = len / PAGE_SIZE; + } - cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, page, cookie); - fscache_uncache_page(cookie, page); + fscache_end_operation(&cres); + return ret; } diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h index 9baa1d0f22bde504bbc5debf15ad4296b7972cdd..55129908e2c131e2080a44978d9eb45e734edf2d 100644 --- a/fs/cifs/fscache.h +++ b/fs/cifs/fscache.h @@ -9,173 +9,154 @@ #ifndef _CIFS_FSCACHE_H #define _CIFS_FSCACHE_H +#include #include #include "cifsglob.h" -#ifdef CONFIG_CIFS_FSCACHE - /* - * Auxiliary data attached to CIFS superblock within the cache + * Coherency data attached to CIFS volume within the cache */ -struct cifs_fscache_super_auxdata { - u64 resource_id; /* unique server resource id */ +struct cifs_fscache_volume_coherency_data { + __le64 resource_id; /* unique server resource id */ __le64 vol_create_time; - u32 vol_serial_number; + __le32 vol_serial_number; } __packed; /* - * Auxiliary data attached to CIFS inode within the cache + * Coherency data attached to CIFS inode within the cache. */ -struct cifs_fscache_inode_auxdata { - u64 last_write_time_sec; - u64 last_change_time_sec; - u32 last_write_time_nsec; - u32 last_change_time_nsec; - u64 eof; +struct cifs_fscache_inode_coherency_data { + __le64 last_write_time_sec; + __le64 last_change_time_sec; + __le32 last_write_time_nsec; + __le32 last_change_time_nsec; }; -/* - * cache.c - */ -extern struct fscache_netfs cifs_fscache_netfs; -extern const struct fscache_cookie_def cifs_fscache_server_index_def; -extern const struct fscache_cookie_def cifs_fscache_super_index_def; -extern const struct fscache_cookie_def cifs_fscache_inode_object_def; - -extern int cifs_fscache_register(void); -extern void cifs_fscache_unregister(void); +#ifdef CONFIG_CIFS_FSCACHE /* * fscache.c */ -extern void cifs_fscache_get_client_cookie(struct TCP_Server_Info *); -extern void cifs_fscache_release_client_cookie(struct TCP_Server_Info *); -extern void cifs_fscache_get_super_cookie(struct cifs_tcon *); +extern int cifs_fscache_get_super_cookie(struct cifs_tcon *); extern void cifs_fscache_release_super_cookie(struct cifs_tcon *); +extern void cifs_fscache_get_inode_cookie(struct inode *inode); extern void cifs_fscache_release_inode_cookie(struct inode *); -extern void cifs_fscache_update_inode_cookie(struct inode *inode); -extern void cifs_fscache_set_inode_cookie(struct inode *, struct file *); -extern void cifs_fscache_reset_inode_cookie(struct inode *); - -extern void __cifs_fscache_invalidate_page(struct page *, struct inode *); -extern void __cifs_fscache_wait_on_page_write(struct inode *inode, struct page *page); -extern void __cifs_fscache_uncache_page(struct inode *inode, struct page *page); -extern int cifs_fscache_release_page(struct page *page, gfp_t gfp); -extern int __cifs_readpage_from_fscache(struct inode *, struct page *); -extern int __cifs_readpages_from_fscache(struct inode *, - struct address_space *, - struct list_head *, - unsigned *); -extern void __cifs_fscache_readpages_cancel(struct inode *, struct list_head *); - -extern void __cifs_readpage_to_fscache(struct inode *, struct page *); - -static inline void cifs_fscache_invalidate_page(struct page *page, - struct inode *inode) +extern void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update); + +static inline +void cifs_fscache_fill_coherency(struct inode *inode, + struct cifs_fscache_inode_coherency_data *cd) { - if (PageFsCache(page)) - __cifs_fscache_invalidate_page(page, inode); + struct cifsInodeInfo *cifsi = CIFS_I(inode); + + memset(cd, 0, sizeof(*cd)); + cd->last_write_time_sec = cpu_to_le64(cifsi->vfs_inode.i_mtime.tv_sec); + cd->last_write_time_nsec = cpu_to_le32(cifsi->vfs_inode.i_mtime.tv_nsec); + cd->last_change_time_sec = cpu_to_le64(cifsi->vfs_inode.i_ctime.tv_sec); + cd->last_change_time_nsec = cpu_to_le32(cifsi->vfs_inode.i_ctime.tv_nsec); } -static inline void cifs_fscache_wait_on_page_write(struct inode *inode, - struct page *page) + +static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { - if (PageFsCache(page)) - __cifs_fscache_wait_on_page_write(inode, page); + return CIFS_I(inode)->fscache; } -static inline void cifs_fscache_uncache_page(struct inode *inode, - struct page *page) +static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) { - if (PageFsCache(page)) - __cifs_fscache_uncache_page(inode, page); + struct cifs_fscache_inode_coherency_data cd; + + cifs_fscache_fill_coherency(inode, &cd); + fscache_invalidate(cifs_inode_cookie(inode), &cd, + i_size_read(inode), flags); } -static inline int cifs_readpage_from_fscache(struct inode *inode, - struct page *page) -{ - if (CIFS_I(inode)->fscache) - return __cifs_readpage_from_fscache(inode, page); +extern int __cifs_fscache_query_occupancy(struct inode *inode, + pgoff_t first, unsigned int nr_pages, + pgoff_t *_data_first, + unsigned int *_data_nr_pages); - return -ENOBUFS; +static inline int cifs_fscache_query_occupancy(struct inode *inode, + pgoff_t first, unsigned int nr_pages, + pgoff_t *_data_first, + unsigned int *_data_nr_pages) +{ + if (!cifs_inode_cookie(inode)) + return -ENOBUFS; + return __cifs_fscache_query_occupancy(inode, first, nr_pages, + _data_first, _data_nr_pages); } -static inline int cifs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) +extern int __cifs_readpage_from_fscache(struct inode *pinode, struct page *ppage); +extern void __cifs_readpage_to_fscache(struct inode *pinode, struct page *ppage); + + +static inline int cifs_readpage_from_fscache(struct inode *inode, + struct page *page) { - if (CIFS_I(inode)->fscache) - return __cifs_readpages_from_fscache(inode, mapping, pages, - nr_pages); + if (cifs_inode_cookie(inode)) + return __cifs_readpage_from_fscache(inode, page); return -ENOBUFS; } static inline void cifs_readpage_to_fscache(struct inode *inode, struct page *page) { - if (PageFsCache(page)) + if (cifs_inode_cookie(inode)) __cifs_readpage_to_fscache(inode, page); } -static inline void cifs_fscache_readpages_cancel(struct inode *inode, - struct list_head *pages) +static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp) { - if (CIFS_I(inode)->fscache) - return __cifs_fscache_readpages_cancel(inode, pages); + if (PageFsCache(page)) { + if (current_is_kswapd() || !(gfp & __GFP_FS)) + return false; + wait_on_page_fscache(page); + fscache_note_page_release(cifs_inode_cookie(page->mapping->host)); + } + return true; } #else /* CONFIG_CIFS_FSCACHE */ -static inline int cifs_fscache_register(void) { return 0; } -static inline void cifs_fscache_unregister(void) {} - -static inline void -cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) {} -static inline void -cifs_fscache_release_client_cookie(struct TCP_Server_Info *server) {} -static inline void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) {} -static inline void -cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {} - -static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} -static inline void cifs_fscache_update_inode_cookie(struct inode *inode) {} -static inline void cifs_fscache_set_inode_cookie(struct inode *inode, - struct file *filp) {} -static inline void cifs_fscache_reset_inode_cookie(struct inode *inode) {} -static inline int cifs_fscache_release_page(struct page *page, gfp_t gfp) +static inline +void cifs_fscache_fill_coherency(struct inode *inode, + struct cifs_fscache_inode_coherency_data *cd) { - return 1; /* May release page */ } -static inline void cifs_fscache_invalidate_page(struct page *page, - struct inode *inode) {} -static inline void cifs_fscache_wait_on_page_write(struct inode *inode, - struct page *page) {} -static inline void cifs_fscache_uncache_page(struct inode *inode, - struct page *page) {} +static inline int cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) { return 0; } +static inline void cifs_fscache_release_super_cookie(struct cifs_tcon *tcon) {} -static inline int -cifs_readpage_from_fscache(struct inode *inode, struct page *page) +static inline void cifs_fscache_get_inode_cookie(struct inode *inode) {} +static inline void cifs_fscache_release_inode_cookie(struct inode *inode) {} +static inline void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update) {} +static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode) { return NULL; } +static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags) {} + +static inline int cifs_fscache_query_occupancy(struct inode *inode, + pgoff_t first, unsigned int nr_pages, + pgoff_t *_data_first, + unsigned int *_data_nr_pages) { + *_data_first = ULONG_MAX; + *_data_nr_pages = 0; return -ENOBUFS; } -static inline int cifs_readpages_from_fscache(struct inode *inode, - struct address_space *mapping, - struct list_head *pages, - unsigned *nr_pages) +static inline int +cifs_readpage_from_fscache(struct inode *inode, struct page *page) { return -ENOBUFS; } -static inline void cifs_readpage_to_fscache(struct inode *inode, - struct page *page) {} +static inline +void cifs_readpage_to_fscache(struct inode *inode, struct page *page) {} -static inline void cifs_fscache_readpages_cancel(struct inode *inode, - struct list_head *pages) +static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) { + return true; /* May release page */ } #endif /* CONFIG_CIFS_FSCACHE */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 279622e4eb1c290b41d017bda10cb5cda93b5575..60d853c92f6aaedf6248db56f79350f7eb6947f4 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -83,6 +83,7 @@ static void cifs_set_ops(struct inode *inode) static void cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) { + struct cifs_fscache_inode_coherency_data cd; struct cifsInodeInfo *cifs_i = CIFS_I(inode); cifs_dbg(FYI, "%s: revalidating inode %llu\n", @@ -113,6 +114,9 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr) cifs_dbg(FYI, "%s: invalidating inode %llu mapping\n", __func__, cifs_i->uniqueid); set_bit(CIFS_INO_INVALID_MAPPING, &cifs_i->flags); + /* Invalidate fscache cookie */ + cifs_fscache_fill_coherency(&cifs_i->vfs_inode, &cd); + fscache_invalidate(cifs_inode_cookie(inode), &cd, i_size_read(inode), 0); } /* @@ -952,6 +956,12 @@ cifs_get_inode_info(struct inode **inode, rc = server->ops->query_path_info(xid, tcon, cifs_sb, full_path, tmp_data, &adjust_tz, &is_reparse_point); +#ifdef CONFIG_CIFS_DFS_UPCALL + if (rc == -ENOENT && is_tcon_dfs(tcon)) + rc = cifs_dfs_query_info_nonascii_quirk(xid, tcon, + cifs_sb, + full_path); +#endif data = tmp_data; } @@ -1298,10 +1308,7 @@ retry_iget5_locked: inode->i_flags |= S_NOATIME | S_NOCMTIME; if (inode->i_state & I_NEW) { inode->i_ino = hash; -#ifdef CONFIG_CIFS_FSCACHE - /* initialize per-inode cache cookie pointer */ - CIFS_I(inode)->fscache = NULL; -#endif + cifs_fscache_get_inode_cookie(inode); unlock_new_inode(inode); } } @@ -1370,6 +1377,7 @@ iget_no_retry: iget_failed(inode); inode = ERR_PTR(rc); } + out: kfree(path); free_xid(xid); @@ -2266,7 +2274,6 @@ cifs_invalidate_mapping(struct inode *inode) __func__, inode); } - cifs_fscache_reset_inode_cookie(inode); return rc; } @@ -2771,8 +2778,10 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) goto out; if ((attrs->ia_valid & ATTR_SIZE) && - attrs->ia_size != i_size_read(inode)) + attrs->ia_size != i_size_read(inode)) { truncate_setsize(inode, attrs->ia_size); + fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size); + } setattr_copy(&init_user_ns, inode, attrs); mark_inode_dirty(inode); @@ -2967,8 +2976,10 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) goto cifs_setattr_exit; if ((attrs->ia_valid & ATTR_SIZE) && - attrs->ia_size != i_size_read(inode)) + attrs->ia_size != i_size_read(inode)) { truncate_setsize(inode, attrs->ia_size); + fscache_resize_cookie(cifs_inode_cookie(inode), attrs->ia_size); + } setattr_copy(&init_user_ns, inode, attrs); mark_inode_dirty(inode); diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 5148d48d6a357e0f231ce1e278d0c67ec42bacc9..56598f7dbe00d8ba3001c52f31141096c85d226b 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -1302,4 +1302,53 @@ int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; return 0; } + +/** cifs_dfs_query_info_nonascii_quirk + * Handle weird Windows SMB server behaviour. It responds with + * STATUS_OBJECT_NAME_INVALID code to SMB2 QUERY_INFO request + * for "\\\" DFS reference, + * where contains non-ASCII unicode symbols. + * + * Check such DFS reference and emulate -ENOENT if it is actual. + */ +int cifs_dfs_query_info_nonascii_quirk(const unsigned int xid, + struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + const char *linkpath) +{ + char *treename, *dfspath, sep; + int treenamelen, linkpathlen, rc; + + treename = tcon->treeName; + /* MS-DFSC: All paths in REQ_GET_DFS_REFERRAL and RESP_GET_DFS_REFERRAL + * messages MUST be encoded with exactly one leading backslash, not two + * leading backslashes. + */ + sep = CIFS_DIR_SEP(cifs_sb); + if (treename[0] == sep && treename[1] == sep) + treename++; + linkpathlen = strlen(linkpath); + treenamelen = strnlen(treename, MAX_TREE_SIZE + 1); + dfspath = kzalloc(treenamelen + linkpathlen + 1, GFP_KERNEL); + if (!dfspath) + return -ENOMEM; + if (treenamelen) + memcpy(dfspath, treename, treenamelen); + memcpy(dfspath + treenamelen, linkpath, linkpathlen); + rc = dfs_cache_find(xid, tcon->ses, cifs_sb->local_nls, + cifs_remap(cifs_sb), dfspath, NULL, NULL); + if (rc == 0) { + cifs_dbg(FYI, "DFS ref '%s' is found, emulate -EREMOTE\n", + dfspath); + rc = -EREMOTE; + } else if (rc == -EEXIST) { + cifs_dbg(FYI, "DFS ref '%s' is not found, emulate -ENOENT\n", + dfspath); + rc = -ENOENT; + } else { + cifs_dbg(FYI, "%s: dfs_cache_find returned %d\n", __func__, rc); + } + kfree(dfspath); + return rc; +} #endif diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index 43b16b6d108c19c92ebd42a54649239b7652507b..ebe236b9d9f56e35c1d1b65ad9038421f6b6d894 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -896,10 +896,7 @@ map_and_check_smb_error(struct mid_q_entry *mid, bool logErr) if (class == ERRSRV && code == ERRbaduid) { cifs_dbg(FYI, "Server returned 0x%x, reconnecting session...\n", code); - spin_lock(&cifs_tcp_ses_lock); - if (mid->server->tcpStatus != CifsExiting) - mid->server->tcpStatus = CifsNeedReconnect; - spin_unlock(&cifs_tcp_ses_lock); + cifs_reconnect(mid->server, false); } } diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 6d242af536cbff1a5ed830ec281cdb349c5518a9..2984584042527a0a461472a1921ce3bde9d657b1 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h @@ -40,7 +40,7 @@ #define NTLMSSP_REQUEST_NON_NT_KEY 0x400000 #define NTLMSSP_NEGOTIATE_TARGET_INFO 0x800000 /* #define reserved4 0x1000000 */ -#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we do not set */ +#define NTLMSSP_NEGOTIATE_VERSION 0x2000000 /* we only set for SMB2+ */ /* #define reserved3 0x4000000 */ /* #define reserved2 0x8000000 */ /* #define reserved1 0x10000000 */ @@ -87,6 +87,30 @@ typedef struct _NEGOTIATE_MESSAGE { /* followed by WorkstationString */ } __attribute__((packed)) NEGOTIATE_MESSAGE, *PNEGOTIATE_MESSAGE; +#define NTLMSSP_REVISION_W2K3 0x0F + +/* See MS-NLMP section 2.2.2.10 */ +struct ntlmssp_version { + __u8 ProductMajorVersion; + __u8 ProductMinorVersion; + __le16 ProductBuild; /* we send the cifs.ko module version here */ + __u8 Reserved[3]; + __u8 NTLMRevisionCurrent; /* currently 0x0F */ +} __packed; + +/* see MS-NLMP section 2.2.1.1 */ +struct negotiate_message { + __u8 Signature[sizeof(NTLMSSP_SIGNATURE)]; + __le32 MessageType; /* NtLmNegotiate = 1 */ + __le32 NegotiateFlags; + SECURITY_BUFFER DomainName; /* RFC 1001 style and ASCII */ + SECURITY_BUFFER WorkstationName; /* RFC 1001 and ASCII */ + struct ntlmssp_version Version; + /* SECURITY_BUFFER */ + char DomainString[0]; + /* followed by WorkstationString */ +} __packed; + typedef struct _CHALLENGE_MESSAGE { __u8 Signature[sizeof(NTLMSSP_SIGNATURE)]; __le32 MessageType; /* NtLmChallenge = 2 */ @@ -123,6 +147,10 @@ int build_ntlmssp_negotiate_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, struct TCP_Server_Info *server, const struct nls_table *nls_cp); +int build_ntlmssp_smb3_negotiate_blob(unsigned char **pbuffer, u16 *buflen, + struct cifs_ses *ses, + struct TCP_Server_Info *server, + const struct nls_table *nls_cp); int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, struct TCP_Server_Info *server, diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index d12490e12be53ed18ca6e4f6718b96273e1891b1..5723d50340e5e8df4bfff941fa6d44716c1c3a2c 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -17,6 +17,8 @@ #include "nterr.h" #include #include +#include +#include "cifsfs.h" #include "cifs_spnego.h" #include "smb2proto.h" #include "fs_context.h" @@ -65,6 +67,8 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface) return false; } +/* channel helper functions. assumed that chan_lock is held by caller. */ + unsigned int cifs_ses_get_chan_index(struct cifs_ses *ses, struct TCP_Server_Info *server) @@ -134,10 +138,10 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) left = ses->chan_max - ses->chan_count; if (left <= 0) { + spin_unlock(&ses->chan_lock); cifs_dbg(FYI, "ses already at max_channels (%zu), nothing to open\n", ses->chan_max); - spin_unlock(&ses->chan_lock); return 0; } @@ -364,19 +368,6 @@ out: return rc; } -/* Mark all session channels for reconnect */ -void cifs_ses_mark_for_reconnect(struct cifs_ses *ses) -{ - int i; - - for (i = 0; i < ses->chan_count; i++) { - spin_lock(&cifs_tcp_ses_lock); - if (ses->chans[i].server->tcpStatus != CifsExiting) - ses->chans[i].server->tcpStatus = CifsNeedReconnect; - spin_unlock(&cifs_tcp_ses_lock); - } -} - static __u32 cifs_ssetup_hdr(struct cifs_ses *ses, struct TCP_Server_Info *server, SESSION_SETUP_ANDX *pSMB) @@ -722,7 +713,11 @@ static int size_of_ntlmssp_blob(struct cifs_ses *ses, int base_size) else sz += sizeof(__le16); - sz += sizeof(__le16) * strnlen(ses->workstation_name, CIFS_MAX_WORKSTATION_LEN); + if (ses->workstation_name) + sz += sizeof(__le16) * strnlen(ses->workstation_name, + CIFS_MAX_WORKSTATION_LEN); + else + sz += sizeof(__le16); return sz; } @@ -820,6 +815,74 @@ setup_ntlm_neg_ret: return rc; } +/* + * Build ntlmssp blob with additional fields, such as version, + * supported by modern servers. For safety limit to SMB3 or later + * See notes in MS-NLMP Section 2.2.2.1 e.g. + */ +int build_ntlmssp_smb3_negotiate_blob(unsigned char **pbuffer, + u16 *buflen, + struct cifs_ses *ses, + struct TCP_Server_Info *server, + const struct nls_table *nls_cp) +{ + int rc = 0; + struct negotiate_message *sec_blob; + __u32 flags; + unsigned char *tmp; + int len; + + len = size_of_ntlmssp_blob(ses, sizeof(struct negotiate_message)); + *pbuffer = kmalloc(len, GFP_KERNEL); + if (!*pbuffer) { + rc = -ENOMEM; + cifs_dbg(VFS, "Error %d during NTLMSSP allocation\n", rc); + *buflen = 0; + goto setup_ntlm_smb3_neg_ret; + } + sec_blob = (struct negotiate_message *)*pbuffer; + + memset(*pbuffer, 0, sizeof(struct negotiate_message)); + memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); + sec_blob->MessageType = NtLmNegotiate; + + /* BB is NTLMV2 session security format easier to use here? */ + flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | + NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | + NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC | + NTLMSSP_NEGOTIATE_ALWAYS_SIGN | NTLMSSP_NEGOTIATE_SEAL | + NTLMSSP_NEGOTIATE_SIGN | NTLMSSP_NEGOTIATE_VERSION; + if (!server->session_estab || ses->ntlmssp->sesskey_per_smbsess) + flags |= NTLMSSP_NEGOTIATE_KEY_XCH; + + sec_blob->Version.ProductMajorVersion = LINUX_VERSION_MAJOR; + sec_blob->Version.ProductMinorVersion = LINUX_VERSION_PATCHLEVEL; + sec_blob->Version.ProductBuild = cpu_to_le16(SMB3_PRODUCT_BUILD); + sec_blob->Version.NTLMRevisionCurrent = NTLMSSP_REVISION_W2K3; + + tmp = *pbuffer + sizeof(struct negotiate_message); + ses->ntlmssp->client_flags = flags; + sec_blob->NegotiateFlags = cpu_to_le32(flags); + + /* these fields should be null in negotiate phase MS-NLMP 3.1.5.1.1 */ + cifs_security_buffer_from_str(&sec_blob->DomainName, + NULL, + CIFS_MAX_DOMAINNAME_LEN, + *pbuffer, &tmp, + nls_cp); + + cifs_security_buffer_from_str(&sec_blob->WorkstationName, + NULL, + CIFS_MAX_WORKSTATION_LEN, + *pbuffer, &tmp, + nls_cp); + + *buflen = tmp - *pbuffer; +setup_ntlm_smb3_neg_ret: + return rc; +} + + int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen, struct cifs_ses *ses, @@ -1048,16 +1111,6 @@ sess_establish_session(struct sess_data *sess_data) mutex_unlock(&server->srv_mutex); cifs_dbg(FYI, "CIFS session established successfully\n"); - spin_lock(&ses->chan_lock); - cifs_chan_clear_need_reconnect(ses, server); - spin_unlock(&ses->chan_lock); - - /* Even if one channel is active, session is in good state */ - spin_lock(&cifs_tcp_ses_lock); - server->tcpStatus = CifsGood; - ses->status = CifsGood; - spin_unlock(&cifs_tcp_ses_lock); - return 0; } @@ -1413,7 +1466,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data) &blob_len, ses, server, sess_data->nls_cp); if (rc) - goto out; + goto out_free_ntlmsspblob; sess_data->iov[1].iov_len = blob_len; sess_data->iov[1].iov_base = ntlmsspblob; @@ -1421,7 +1474,7 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data) rc = _sess_auth_rawntlmssp_assemble_req(sess_data); if (rc) - goto out; + goto out_free_ntlmsspblob; rc = sess_sendreceive(sess_data); @@ -1435,14 +1488,14 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data) rc = 0; if (rc) - goto out; + goto out_free_ntlmsspblob; cifs_dbg(FYI, "rawntlmssp session setup challenge phase\n"); if (smb_buf->WordCount != 4) { rc = -EIO; cifs_dbg(VFS, "bad word count %d\n", smb_buf->WordCount); - goto out; + goto out_free_ntlmsspblob; } ses->Suid = smb_buf->Uid; /* UID left in wire format (le) */ @@ -1456,10 +1509,13 @@ sess_auth_rawntlmssp_negotiate(struct sess_data *sess_data) cifs_dbg(VFS, "bad security blob length %d\n", blob_len); rc = -EINVAL; - goto out; + goto out_free_ntlmsspblob; } rc = decode_ntlmssp_challenge(bcc_ptr, blob_len, ses); + +out_free_ntlmsspblob: + kfree(ntlmsspblob); out: sess_free_buffer(sess_data); @@ -1574,7 +1630,7 @@ out_free_ntlmsspblob: out: sess_free_buffer(sess_data); - if (!rc) + if (!rc) rc = sess_establish_session(sess_data); /* Cleanup */ diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 8272c91e15ef33719e955ea5919ab7bf51c37a8d..b2fb7bd1193666413f0196035aa1000ffc95efca 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -228,9 +228,7 @@ cifs_get_next_mid(struct TCP_Server_Info *server) spin_unlock(&GlobalMid_Lock); if (reconnect) { - spin_lock(&cifs_tcp_ses_lock); - server->tcpStatus = CifsNeedReconnect; - spin_unlock(&cifs_tcp_ses_lock); + cifs_mark_tcp_ses_conns_for_reconnect(server, false); } return mid; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 8d471df69c596960016187d83ceb885749ecc3e1..7e7909b1ae1180f836054d51a0da6bc918943231 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -244,10 +244,10 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, spin_unlock(&ses->chan_lock); return 0; } + spin_unlock(&ses->chan_lock); cifs_dbg(FYI, "sess reconnect mask: 0x%lx, tcon reconnect: %d", tcon->ses->chans_need_reconnect, tcon->need_reconnect); - spin_unlock(&ses->chan_lock); nls_codepage = load_nls_default(); @@ -289,14 +289,18 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, rc = -EHOSTDOWN; goto failed; } - } - - if (rc || !tcon->need_reconnect) { + } else { mutex_unlock(&ses->session_mutex); goto out; } + mutex_unlock(&ses->session_mutex); skip_sess_setup: + mutex_lock(&ses->session_mutex); + if (!tcon->need_reconnect) { + mutex_unlock(&ses->session_mutex); + goto out; + } cifs_mark_open_files_invalid(tcon); if (tcon->use_persistent) tcon->need_reopen_files = true; @@ -1382,17 +1386,6 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data) mutex_unlock(&server->srv_mutex); cifs_dbg(FYI, "SMB2/3 session established successfully\n"); - - spin_lock(&ses->chan_lock); - cifs_chan_clear_need_reconnect(ses, server); - spin_unlock(&ses->chan_lock); - - /* Even if one channel is active, session is in good state */ - spin_lock(&cifs_tcp_ses_lock); - server->tcpStatus = CifsGood; - ses->status = CifsGood; - spin_unlock(&cifs_tcp_ses_lock); - return rc; } @@ -1513,7 +1506,7 @@ SMB2_sess_auth_rawntlmssp_negotiate(struct SMB2_sess_data *sess_data) if (rc) goto out_err; - rc = build_ntlmssp_negotiate_blob(&ntlmssp_blob, + rc = build_ntlmssp_smb3_negotiate_blob(&ntlmssp_blob, &blob_length, ses, server, sess_data->nls_cp); if (rc) @@ -1920,10 +1913,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, tcon->share_flags = le32_to_cpu(rsp->ShareFlags); tcon->capabilities = rsp->Capabilities; /* we keep caps little endian */ tcon->maximal_access = le32_to_cpu(rsp->MaximalAccess); - spin_lock(&cifs_tcp_ses_lock); - tcon->tidStatus = CifsGood; - spin_unlock(&cifs_tcp_ses_lock); - tcon->need_reconnect = false; tcon->tid = le32_to_cpu(rsp->hdr.Id.SyncId.TreeId); strlcpy(tcon->treeName, tree, sizeof(tcon->treeName)); @@ -2587,8 +2576,13 @@ alloc_path_with_tree_prefix(__le16 **out_path, int *out_size, int *out_len, cp = load_nls_default(); cifs_strtoUTF16(*out_path, treename, treename_len, cp); - UniStrcat(*out_path, sep); - UniStrcat(*out_path, path); + + /* Do not append the separator if the path is empty */ + if (path[0] != cpu_to_le16(0x0000)) { + UniStrcat(*out_path, sep); + UniStrcat(*out_path, path); + } + unload_nls(cp); return 0; @@ -3782,27 +3776,35 @@ void smb2_reconnect_server(struct work_struct *work) { struct TCP_Server_Info *server = container_of(work, struct TCP_Server_Info, reconnect.work); - struct cifs_ses *ses; + struct TCP_Server_Info *pserver; + struct cifs_ses *ses, *ses2; struct cifs_tcon *tcon, *tcon2; - struct list_head tmp_list; - int tcon_exist = false; + struct list_head tmp_list, tmp_ses_list; + bool tcon_exist = false, ses_exist = false; + bool tcon_selected = false; int rc; - int resched = false; + bool resched = false; + /* If server is a channel, select the primary channel */ + pserver = CIFS_SERVER_IS_CHAN(server) ? server->primary_server : server; /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */ - mutex_lock(&server->reconnect_mutex); + mutex_lock(&pserver->reconnect_mutex); INIT_LIST_HEAD(&tmp_list); - cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n"); + INIT_LIST_HEAD(&tmp_ses_list); + cifs_dbg(FYI, "Reconnecting tcons and channels\n"); spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) { + + tcon_selected = false; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { if (tcon->need_reconnect || tcon->need_reopen_files) { tcon->tc_count++; list_add_tail(&tcon->rlist, &tmp_list); - tcon_exist = true; + tcon_selected = tcon_exist = true; } } /* @@ -3811,15 +3813,27 @@ void smb2_reconnect_server(struct work_struct *work) */ if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) { list_add_tail(&ses->tcon_ipc->rlist, &tmp_list); - tcon_exist = true; + tcon_selected = tcon_exist = true; ses->ses_count++; } + /* + * handle the case where channel needs to reconnect + * binding session, but tcon is healthy (some other channel + * is active) + */ + spin_lock(&ses->chan_lock); + if (!tcon_selected && cifs_chan_needs_reconnect(ses, server)) { + list_add_tail(&ses->rlist, &tmp_ses_list); + ses_exist = true; + ses->ses_count++; + } + spin_unlock(&ses->chan_lock); } /* * Get the reference to server struct to be sure that the last call of * cifs_put_tcon() in the loop below won't release the server pointer. */ - if (tcon_exist) + if (tcon_exist || ses_exist) server->srv_count++; spin_unlock(&cifs_tcp_ses_lock); @@ -3837,13 +3851,41 @@ void smb2_reconnect_server(struct work_struct *work) cifs_put_tcon(tcon); } - cifs_dbg(FYI, "Reconnecting tcons finished\n"); + if (!ses_exist) + goto done; + + /* allocate a dummy tcon struct used for reconnect */ + tcon = kzalloc(sizeof(struct cifs_tcon), GFP_KERNEL); + if (!tcon) { + resched = true; + list_del_init(&ses->rlist); + cifs_put_smb_ses(ses); + goto done; + } + + tcon->tidStatus = CifsGood; + tcon->retry = false; + tcon->need_reconnect = false; + + /* now reconnect sessions for necessary channels */ + list_for_each_entry_safe(ses, ses2, &tmp_ses_list, rlist) { + tcon->ses = ses; + rc = smb2_reconnect(SMB2_INTERNAL_CMD, tcon, server); + if (rc) + resched = true; + list_del_init(&ses->rlist); + cifs_put_smb_ses(ses); + } + kfree(tcon); + +done: + cifs_dbg(FYI, "Reconnecting tcons and channels finished\n"); if (resched) queue_delayed_work(cifsiod_wq, &server->reconnect, 2 * HZ); - mutex_unlock(&server->reconnect_mutex); + mutex_unlock(&pserver->reconnect_mutex); /* now we can safely release srv struct */ - if (tcon_exist) + if (tcon_exist || ses_exist) cifs_put_tcp_session(server, 1); } diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index b70a49b4edc0d9b19067765cce6bc680a396eccc..2af79093b78bcc2f7a7c60a4df0fb5c1fff83bc1 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -100,6 +100,7 @@ int smb2_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key) goto out; found: + spin_lock(&ses->chan_lock); if (cifs_chan_needs_reconnect(ses, server) && !CIFS_ALL_CHANS_NEED_RECONNECT(ses)) { /* @@ -108,6 +109,7 @@ found: * session key */ memcpy(key, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); + spin_unlock(&ses->chan_lock); goto out; } @@ -119,9 +121,11 @@ found: chan = ses->chans + i; if (chan->server == server) { memcpy(key, chan->signkey, SMB3_SIGN_KEY_SIZE); + spin_unlock(&ses->chan_lock); goto out; } } + spin_unlock(&ses->chan_lock); cifs_dbg(VFS, "%s: Could not find channel signing key for session 0x%llx\n", @@ -430,8 +434,10 @@ generate_smb3signingkey(struct cifs_ses *ses, return rc; /* safe to access primary channel, since it will never go away */ + spin_lock(&ses->chan_lock); memcpy(ses->chans[0].signkey, ses->smb3signingkey, SMB3_SIGN_KEY_SIZE); + spin_unlock(&ses->chan_lock); rc = generate_key(ses, ptriplet->encryption.label, ptriplet->encryption.context, diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 93f0e8c1ea233a360ad8576cb67db41c47054c53..a4c3e027cca2505c1ba2b0de913608673492180b 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -430,9 +430,7 @@ unmask: * be taken as the remainder of this one. We need to kill the * socket so the server throws away the partial SMB */ - spin_lock(&cifs_tcp_ses_lock); - server->tcpStatus = CifsNeedReconnect; - spin_unlock(&cifs_tcp_ses_lock); + cifs_mark_tcp_ses_conns_for_reconnect(server, false); trace_smb3_partial_send_reconnect(server->CurrentMid, server->conn_id, server->hostname); } @@ -729,17 +727,6 @@ static int allocate_mid(struct cifs_ses *ses, struct smb_hdr *in_buf, struct mid_q_entry **ppmidQ) { spin_lock(&cifs_tcp_ses_lock); - if (ses->server->tcpStatus == CifsExiting) { - spin_unlock(&cifs_tcp_ses_lock); - return -ENOENT; - } - - if (ses->server->tcpStatus == CifsNeedReconnect) { - spin_unlock(&cifs_tcp_ses_lock); - cifs_dbg(FYI, "tcp session dead - return to caller to retry\n"); - return -EAGAIN; - } - if (ses->status == CifsNew) { if ((in_buf->Command != SMB_COM_SESSION_SETUP_ANDX) && (in_buf->Command != SMB_COM_NEGOTIATE)) { @@ -1059,7 +1046,10 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) /* round robin */ index = (uint)atomic_inc_return(&ses->chan_seq); + + spin_lock(&ses->chan_lock); index %= ses->chan_count; + spin_unlock(&ses->chan_lock); return ses->chans[index].server; } diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 1466b5d01cbb9a4a56f18adf84c54f1afe168d45..d3cd2a94d1e8c3f6f4fa965cf7b6f9d17b35326c 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1780,8 +1780,8 @@ void configfs_unregister_group(struct config_group *group) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); + d_drop(dentry); fsnotify_rmdir(d_inode(parent), dentry); - d_delete(dentry); inode_unlock(d_inode(parent)); dput(dentry); @@ -1922,10 +1922,10 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); - fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(dentry)); - d_delete(dentry); + d_drop(dentry); + fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(root)); diff --git a/fs/coredump.c b/fs/coredump.c index 7dece20b162b38fc6215f3ae38a4f64d62179d50..1c060c0a2d72f25651d85ba2eee6af7360228432 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -52,9 +53,9 @@ #include -int core_uses_pid; -unsigned int core_pipe_limit; -char core_pattern[CORENAME_MAX_SIZE] = "core"; +static int core_uses_pid; +static unsigned int core_pipe_limit; +static char core_pattern[CORENAME_MAX_SIZE] = "core"; static int core_name_size = CORENAME_MAX_SIZE; struct core_name { @@ -62,8 +63,6 @@ struct core_name { int used, size; }; -/* The maximal length of core_pattern is also specified in sysctl.c */ - static int expand_corename(struct core_name *cn, int size) { char *corename = krealloc(cn->corename, size, GFP_KERNEL); @@ -893,6 +892,63 @@ int dump_align(struct coredump_params *cprm, int align) } EXPORT_SYMBOL(dump_align); +#ifdef CONFIG_SYSCTL + +void validate_coredump_safety(void) +{ + if (suid_dumpable == SUID_DUMP_ROOT && + core_pattern[0] != '/' && core_pattern[0] != '|') { + pr_warn( +"Unsafe core_pattern used with fs.suid_dumpable=2.\n" +"Pipe handler or fully qualified core dump path required.\n" +"Set kernel.core_pattern before fs.suid_dumpable.\n" + ); + } +} + +static int proc_dostring_coredump(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dostring(table, write, buffer, lenp, ppos); + + if (!error) + validate_coredump_safety(); + return error; +} + +static struct ctl_table coredump_sysctls[] = { + { + .procname = "core_uses_pid", + .data = &core_uses_pid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "core_pattern", + .data = core_pattern, + .maxlen = CORENAME_MAX_SIZE, + .mode = 0644, + .proc_handler = proc_dostring_coredump, + }, + { + .procname = "core_pipe_limit", + .data = &core_pipe_limit, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + +static int __init init_fs_coredump_sysctls(void) +{ + register_sysctl_init("kernel", coredump_sysctls); + return 0; +} +fs_initcall(init_fs_coredump_sysctls); +#endif /* CONFIG_SYSCTL */ + /* * The purpose of always_dump_vma() is to make sure that special kernel mappings * that are useful for post-mortem analysis are included in every core dump. diff --git a/fs/dcache.c b/fs/dcache.c index cf871a81f4fdcc5501463c3c3a35e4aac3c03eec..c84269c6e8bf6df3b9e53ed42bb450bd500e2afe 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -115,10 +115,13 @@ static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent, return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT); } - -/* Statistics gathering. */ -struct dentry_stat_t dentry_stat = { - .age_limit = 45, +struct dentry_stat_t { + long nr_dentry; + long nr_unused; + long age_limit; /* age in seconds */ + long want_pages; /* pages requested by system */ + long nr_negative; /* # of unused negative dentries */ + long dummy; /* Reserved for future use */ }; static DEFINE_PER_CPU(long, nr_dentry); @@ -126,6 +129,10 @@ static DEFINE_PER_CPU(long, nr_dentry_unused); static DEFINE_PER_CPU(long, nr_dentry_negative); #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) +/* Statistics gathering. */ +static struct dentry_stat_t dentry_stat = { + .age_limit = 45, +}; /* * Here we resort to our own counters instead of using generic per-cpu counters @@ -167,14 +174,32 @@ static long get_nr_dentry_negative(void) return sum < 0 ? 0 : sum; } -int proc_nr_dentry(struct ctl_table *table, int write, void *buffer, - size_t *lenp, loff_t *ppos) +static int proc_nr_dentry(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); dentry_stat.nr_unused = get_nr_dentry_unused(); dentry_stat.nr_negative = get_nr_dentry_negative(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } + +static struct ctl_table fs_dcache_sysctls[] = { + { + .procname = "dentry-state", + .data = &dentry_stat, + .maxlen = 6*sizeof(long), + .mode = 0444, + .proc_handler = proc_nr_dentry, + }, + { } +}; + +static int __init init_fs_dcache_sysctls(void) +{ + register_sysctl_init("fs", fs_dcache_sysctls); + return 0; +} +fs_initcall(init_fs_dcache_sysctls); #endif /* diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 42e5a766d33c7d1fea3ca9c05be73639c38c421e..4f25015aa5342a332c76ad7d931a507784eea764 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -621,8 +621,8 @@ void devpts_pty_kill(struct dentry *dentry) dentry->d_fsdata = NULL; drop_nlink(dentry->d_inode); - fsnotify_unlink(d_inode(dentry->d_parent), dentry); d_drop(dentry); + fsnotify_unlink(d_inode(dentry->d_parent), dentry); dput(dentry); /* d_alloc_name() in devpts_pty_new() */ } diff --git a/fs/erofs/data.c b/fs/erofs/data.c index fa7ddb7ad98008722b45da5b9160638bb0cd03be..226a57c57ee6fb439e7341b6d2e37889ae8dc448 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -252,12 +252,10 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, return ret; iomap->offset = map.m_la; - if (flags & IOMAP_DAX) { + if (flags & IOMAP_DAX) iomap->dax_dev = mdev.m_daxdev; - iomap->offset += mdev.m_dax_part_off; - } else { + else iomap->bdev = mdev.m_bdev; - } iomap->length = map.m_llen; iomap->flags = 0; iomap->private = NULL; @@ -284,6 +282,8 @@ static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, } else { iomap->type = IOMAP_MAPPED; iomap->addr = mdev.m_pa; + if (flags & IOMAP_DAX) + iomap->addr += mdev.m_dax_part_off; } return 0; } diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 498b7666efe852b94c93cc3379ee2b4216d10a94..423bc1a61da5c0615472626c1a7e1770bd14fd52 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -810,68 +810,11 @@ static bool z_erofs_get_sync_decompress_policy(struct erofs_sb_info *sbi, return false; } -static void z_erofs_decompressqueue_work(struct work_struct *work); -static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, - bool sync, int bios) -{ - struct erofs_sb_info *const sbi = EROFS_SB(io->sb); - - /* wake up the caller thread for sync decompression */ - if (sync) { - unsigned long flags; - - spin_lock_irqsave(&io->u.wait.lock, flags); - if (!atomic_add_return(bios, &io->pending_bios)) - wake_up_locked(&io->u.wait); - spin_unlock_irqrestore(&io->u.wait.lock, flags); - return; - } - - if (atomic_add_return(bios, &io->pending_bios)) - return; - /* Use workqueue and sync decompression for atomic contexts only */ - if (in_atomic() || irqs_disabled()) { - queue_work(z_erofs_workqueue, &io->u.work); - /* enable sync decompression for readahead */ - if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) - sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON; - return; - } - z_erofs_decompressqueue_work(&io->u.work); -} - static bool z_erofs_page_is_invalidated(struct page *page) { return !page->mapping && !z_erofs_is_shortlived_page(page); } -static void z_erofs_decompressqueue_endio(struct bio *bio) -{ - tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private); - struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t); - blk_status_t err = bio->bi_status; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - - DBG_BUGON(PageUptodate(page)); - DBG_BUGON(z_erofs_page_is_invalidated(page)); - - if (err) - SetPageError(page); - - if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { - if (!err) - SetPageUptodate(page); - unlock_page(page); - } - } - z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1); - bio_put(bio); -} - static int z_erofs_decompress_pcluster(struct super_block *sb, struct z_erofs_pcluster *pcl, struct page **pagepool) @@ -1123,6 +1066,35 @@ static void z_erofs_decompressqueue_work(struct work_struct *work) kvfree(bgq); } +static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, + bool sync, int bios) +{ + struct erofs_sb_info *const sbi = EROFS_SB(io->sb); + + /* wake up the caller thread for sync decompression */ + if (sync) { + unsigned long flags; + + spin_lock_irqsave(&io->u.wait.lock, flags); + if (!atomic_add_return(bios, &io->pending_bios)) + wake_up_locked(&io->u.wait); + spin_unlock_irqrestore(&io->u.wait.lock, flags); + return; + } + + if (atomic_add_return(bios, &io->pending_bios)) + return; + /* Use workqueue and sync decompression for atomic contexts only */ + if (in_atomic() || irqs_disabled()) { + queue_work(z_erofs_workqueue, &io->u.work); + /* enable sync decompression for readahead */ + if (sbi->opt.sync_decompress == EROFS_SYNC_DECOMPRESS_AUTO) + sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_FORCE_ON; + return; + } + z_erofs_decompressqueue_work(&io->u.work); +} + static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, unsigned int nr, struct page **pagepool, @@ -1300,6 +1272,33 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, qtail[JQ_BYPASS] = &pcl->next; } +static void z_erofs_decompressqueue_endio(struct bio *bio) +{ + tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private); + struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t); + blk_status_t err = bio->bi_status; + struct bio_vec *bvec; + struct bvec_iter_all iter_all; + + bio_for_each_segment_all(bvec, bio, iter_all) { + struct page *page = bvec->bv_page; + + DBG_BUGON(PageUptodate(page)); + DBG_BUGON(z_erofs_page_is_invalidated(page)); + + if (err) + SetPageError(page); + + if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { + if (!err) + SetPageUptodate(page); + unlock_page(page); + } + } + z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1); + bio_put(bio); +} + static void z_erofs_submit_queue(struct super_block *sb, struct z_erofs_decompress_frontend *f, struct page **pagepool, diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 18d7fd1a50646039c29b6769cea3bbfd4a9d3766..361b1d6e4bf9e9ed3d1f6b2906ceaa0e77c4ce5a 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -630,6 +630,13 @@ static int z_erofs_do_map_blocks(struct inode *inode, if (endoff >= m.clusterofs) { m.headtype = m.type; map->m_la = (m.lcn << lclusterbits) | m.clusterofs; + /* + * For ztailpacking files, in order to inline data more + * effectively, special EOF lclusters are now supported + * which can have three parts at most. + */ + if (ztailpacking && end > inode->i_size) + end = inode->i_size; break; } /* m.lcn should be >= 1 if endoff < m.clusterofs */ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 06f4c5ae1451eff56e0e4f47ca90926af110155a..e2daa940ebce7c77ee836aca2a6e82a7deb80b18 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -307,7 +307,7 @@ static void unlist_file(struct epitems_head *head) static long long_zero; static long long_max = LONG_MAX; -struct ctl_table epoll_table[] = { +static struct ctl_table epoll_table[] = { { .procname = "max_user_watches", .data = &max_user_watches, @@ -319,6 +319,13 @@ struct ctl_table epoll_table[] = { }, { } }; + +static void __init epoll_sysctls_init(void) +{ + register_sysctl("fs/epoll", epoll_table); +} +#else +#define epoll_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ static const struct file_operations eventpoll_fops; @@ -2378,6 +2385,7 @@ static int __init eventpoll_init(void) /* Allocates slab cache used to allocate "struct eppoll_entry" */ pwq_cache = kmem_cache_create("eventpoll_pwq", sizeof(struct eppoll_entry), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL); + epoll_sysctls_init(); ephead_cache = kmem_cache_create("ep_head", sizeof(struct epitems_head), 0, SLAB_PANIC|SLAB_ACCOUNT, NULL); diff --git a/fs/exec.c b/fs/exec.c index 3c3c366a9bcf581d718515409ad169e87ec46c5c..79f2c9483302df9bbf0f37f507022c28ae38c177 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include @@ -2099,3 +2100,37 @@ COMPAT_SYSCALL_DEFINE5(execveat, int, fd, argv, envp, flags); } #endif + +#ifdef CONFIG_SYSCTL + +static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (!error) + validate_coredump_safety(); + return error; +} + +static struct ctl_table fs_exec_sysctls[] = { + { + .procname = "suid_dumpable", + .data = &suid_dumpable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_coredump, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { } +}; + +static int __init init_fs_exec_sysctls(void) +{ + register_sysctl_init("fs", fs_exec_sysctls); + return 0; +} + +fs_initcall(init_fs_exec_sysctls); +#endif /* CONFIG_SYSCTL */ diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 5a35768d6149a151160f99dc4ba12b80c8d02b7e..57e82e25f8e2320329b77c0a8ffcb1884c0d2af7 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -139,7 +139,7 @@ fail: /* * Inode operation get_posix_acl(). * - * inode->i_mutex: don't care + * inode->i_rwsem: don't care */ struct posix_acl * ext4_get_acl(struct inode *inode, int type, bool rcu) @@ -183,7 +183,7 @@ ext4_get_acl(struct inode *inode, int type, bool rcu) /* * Set the access or default ACL of an inode. * - * inode->i_mutex: down unless called from ext4_new_inode + * inode->i_rwsem: down unless called from ext4_new_inode */ static int __ext4_set_acl(handle_t *handle, struct inode *inode, int type, @@ -271,8 +271,8 @@ out_stop: /* * Initialize the ACLs of a new inode. Called from ext4_new_inode. * - * dir->i_mutex: down - * inode->i_mutex: up (access to inode is still exclusive) + * dir->i_rwsem: down + * inode->i_rwsem: up (access to inode is still exclusive) */ int ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 71a3cdceaa030f91e2d28e3dc9deac96b817d529..bcd3b9bf8069b26d93836a1ed64fd4716f7a1674 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1028,7 +1028,7 @@ struct ext4_inode_info { /* * Extended attributes can be read independently of the main file - * data. Taking i_mutex even when reading would cause contention + * data. Taking i_rwsem even when reading would cause contention * between readers of EAs and writers of regular file data, so * instead we synchronize on xattr_sem when reading or changing * EAs. @@ -1750,6 +1750,7 @@ struct ext4_sb_info { spinlock_t s_fc_lock; struct buffer_head *s_fc_bh; struct ext4_fc_stats s_fc_stats; + tid_t s_fc_ineligible_tid; #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; #endif @@ -1795,10 +1796,7 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) enum { EXT4_MF_MNTDIR_SAMPLED, EXT4_MF_FS_ABORTED, /* Fatal error detected */ - EXT4_MF_FC_INELIGIBLE, /* Fast commit ineligible */ - EXT4_MF_FC_COMMITTING /* File system underoing a fast - * commit. - */ + EXT4_MF_FC_INELIGIBLE /* Fast commit ineligible */ }; static inline void ext4_set_mount_flag(struct super_block *sb, int bit) @@ -2485,7 +2483,7 @@ struct ext4_filename { #ifdef CONFIG_FS_ENCRYPTION struct fscrypt_str crypto_buf; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct fscrypt_str cf_name; #endif }; @@ -2721,7 +2719,7 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb, struct ext4_group_desc *gdp); ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) extern int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname, struct ext4_filename *fname); @@ -2754,7 +2752,7 @@ static inline int ext4_fname_setup_filename(struct inode *dir, ext4_fname_from_fscrypt_name(fname, &name); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); #endif return err; @@ -2773,7 +2771,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, ext4_fname_from_fscrypt_name(fname, &name); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, &dentry->d_name, fname); #endif return err; @@ -2790,7 +2788,7 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) fname->usr_fname = NULL; fname->disk_name.name = NULL; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) kfree(fname->cf_name.name); fname->cf_name.name = NULL; #endif @@ -2806,7 +2804,7 @@ static inline int ext4_fname_setup_filename(struct inode *dir, fname->disk_name.name = (unsigned char *) iname->name; fname->disk_name.len = iname->len; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) err = ext4_fname_setup_ci_filename(dir, iname, fname); #endif @@ -2822,7 +2820,7 @@ static inline int ext4_fname_prepare_lookup(struct inode *dir, static inline void ext4_fname_free_filename(struct ext4_filename *fname) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) kfree(fname->cf_name.name); fname->cf_name.name = NULL; #endif @@ -2926,7 +2924,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode, struct dentry *dentry); void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_inode(handle_t *handle, struct inode *inode); -void ext4_fc_mark_ineligible(struct super_block *sb, int reason); +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle); void ext4_fc_start_update(struct inode *inode); void ext4_fc_stop_update(struct inode *inode); void ext4_fc_del(struct inode *inode); @@ -2935,6 +2933,9 @@ void ext4_fc_replay_cleanup(struct super_block *sb); int ext4_fc_commit(journal_t *journal, tid_t commit_tid); int __init ext4_fc_init_dentry_cache(void); void ext4_fc_destroy_dentry_cache(void); +int ext4_fc_record_regions(struct super_block *sb, int ino, + ext4_lblk_t lblk, ext4_fsblk_t pblk, + int len, int replay); /* mballoc.c */ extern const struct seq_operations ext4_mb_seq_groups_ops; @@ -3407,7 +3408,7 @@ do { \ #define EXT4_FREECLUSTERS_WATERMARK 0 #endif -/* Update i_disksize. Requires i_mutex to avoid races with truncate */ +/* Update i_disksize. Requires i_rwsem to avoid races with truncate */ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) { WARN_ON_ONCE(S_ISREG(inode->i_mode) && @@ -3418,7 +3419,7 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) up_write(&EXT4_I(inode)->i_data_sem); } -/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */ +/* Update i_size, i_disksize. Requires i_rwsem to avoid races with truncate */ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize) { int changed = 0; diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 0e4fa644df018695aecb9ddc50bee7807fdfc953..db2ae4a2b38d8f4526674a2702e5b2f99a86c262 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -491,7 +491,7 @@ static inline int ext4_free_data_revoke_credits(struct inode *inode, int blocks) /* * This function controls whether or not we should try to go down the * dioread_nolock code paths, which makes it safe to avoid taking - * i_mutex for direct I/O reads. This only works for extent-based + * i_rwsem for direct I/O reads. This only works for extent-based * files, and it doesn't work if data journaling is enabled, since the * dioread_nolock code uses b_private to pass information back to the * I/O completion handler, and this conflicts with the jbd's use of diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74c91da585d7f9f7e1e775c78f269dabf71f5997..c0f3f83e0c1b1f6a5ec5f45f2d0885810f30d1dc 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -97,7 +97,7 @@ static int ext4_ext_trunc_restart_fn(struct inode *inode, int *dropped) * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by - * i_mutex. So we can safely drop the i_data_sem here. + * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); ext4_discard_preallocations(inode, 0); @@ -4572,7 +4572,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT; - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); /* Preallocate the range including the unaligned edges */ @@ -4738,7 +4738,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) goto out; } - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, flags); @@ -5334,7 +5334,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); down_write(&EXT4_I(inode)->i_data_sem); ext4_discard_preallocations(inode, 0); @@ -5474,7 +5474,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) ret = PTR_ERR(handle); goto out_mmap; } - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle); /* Expand file to avoid data loss if there is error while shifting */ inode->i_size += len; @@ -5571,7 +5571,7 @@ out_mutex: * stuff such as page-cache locking consistency, bh mapping consistency or * extent's data copying must be performed by caller. * Locking: - * i_mutex is held for both inodes + * i_rwsem is held for both inodes * i_data_sem is locked for write for both inodes * Assumptions: * All pages from requested range are locked for both inodes @@ -6091,11 +6091,15 @@ int ext4_ext_clear_bb(struct inode *inode) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, 0); + ext4_fc_record_regions(inode->i_sb, inode->i_ino, + 0, path[j].p_block, 1, 1); } ext4_ext_drop_refs(path); kfree(path); } ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); + ext4_fc_record_regions(inode->i_sb, inode->i_ino, + map.m_lblk, map.m_pblk, map.m_len, 1); } cur = cur + map.m_len; } diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 5ae8026a0c56294de77d19a8ea88615d41cd8d62..7964ee34e322af3390256d6f4812793f65475663 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -300,18 +300,32 @@ restart: } /* - * Mark file system as fast commit ineligible. This means that next commit - * operation would result in a full jbd2 commit. + * Mark file system as fast commit ineligible, and record latest + * ineligible transaction tid. This means until the recorded + * transaction, commit operation would result in a full jbd2 commit. */ -void ext4_fc_mark_ineligible(struct super_block *sb, int reason) +void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle) { struct ext4_sb_info *sbi = EXT4_SB(sb); + tid_t tid; if (!test_opt2(sb, JOURNAL_FAST_COMMIT) || (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY)) return; ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + if (handle && !IS_ERR(handle)) + tid = handle->h_transaction->t_tid; + else { + read_lock(&sbi->s_journal->j_state_lock); + tid = sbi->s_journal->j_running_transaction ? + sbi->s_journal->j_running_transaction->t_tid : 0; + read_unlock(&sbi->s_journal->j_state_lock); + } + spin_lock(&sbi->s_fc_lock); + if (sbi->s_fc_ineligible_tid < tid) + sbi->s_fc_ineligible_tid = tid; + spin_unlock(&sbi->s_fc_lock); WARN_ON(reason >= EXT4_FC_REASON_MAX); sbi->s_fc_stats.fc_ineligible_reason_count[reason]++; } @@ -361,7 +375,8 @@ static int ext4_fc_track_template( spin_lock(&sbi->s_fc_lock); if (list_empty(&EXT4_I(inode)->i_fc_list)) list_add_tail(&EXT4_I(inode)->i_fc_list, - (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) ? + (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || + sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) ? &sbi->s_fc_q[FC_Q_STAGING] : &sbi->s_fc_q[FC_Q_MAIN]); spin_unlock(&sbi->s_fc_lock); @@ -387,7 +402,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) mutex_unlock(&ei->i_fc_lock); node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS); if (!node) { - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -400,7 +415,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) if (!node->fcd_name.name) { kmem_cache_free(ext4_fc_dentry_cachep, node); ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_NOMEM); + EXT4_FC_REASON_NOMEM, NULL); mutex_lock(&ei->i_fc_lock); return -ENOMEM; } @@ -414,7 +429,8 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update) node->fcd_name.len = dentry->d_name.len; spin_lock(&sbi->s_fc_lock); - if (ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_COMMITTING)) + if (sbi->s_journal->j_flags & JBD2_FULL_COMMIT_ONGOING || + sbi->s_journal->j_flags & JBD2_FAST_COMMIT_ONGOING) list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_STAGING]); else @@ -502,7 +518,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode) if (ext4_should_journal_data(inode)) { ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_INODE_JOURNAL_DATA); + EXT4_FC_REASON_INODE_JOURNAL_DATA, handle); return; } @@ -879,7 +895,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal) int ret = 0; spin_lock(&sbi->s_fc_lock); - ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING); list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) { ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING); while (atomic_read(&ei->i_fc_updates)) { @@ -1179,7 +1194,7 @@ fallback: * Fast commit cleanup routine. This is called after every fast commit and * full commit. full is true if we are called after a full commit. */ -static void ext4_fc_cleanup(journal_t *journal, int full) +static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid) { struct super_block *sb = journal->j_private; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -1197,7 +1212,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_del_init(&iter->i_fc_list); ext4_clear_inode_state(&iter->vfs_inode, EXT4_STATE_FC_COMMITTING); - ext4_fc_reset_inode(&iter->vfs_inode); + if (iter->i_sync_tid <= tid) + ext4_fc_reset_inode(&iter->vfs_inode); /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */ smp_mb(); #if (BITS_PER_LONG < 64) @@ -1226,8 +1242,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full) list_splice_init(&sbi->s_fc_q[FC_Q_STAGING], &sbi->s_fc_q[FC_Q_MAIN]); - ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); - ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + if (tid >= sbi->s_fc_ineligible_tid) { + sbi->s_fc_ineligible_tid = 0; + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + } if (full) sbi->s_fc_bytes = 0; @@ -1392,14 +1410,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) if (state->fc_modified_inodes[i] == ino) return 0; if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { - state->fc_modified_inodes_size += - EXT4_FC_REPLAY_REALLOC_INCREMENT; state->fc_modified_inodes = krealloc( - state->fc_modified_inodes, sizeof(int) * - state->fc_modified_inodes_size, - GFP_KERNEL); + state->fc_modified_inodes, + sizeof(int) * (state->fc_modified_inodes_size + + EXT4_FC_REPLAY_REALLOC_INCREMENT), + GFP_KERNEL); if (!state->fc_modified_inodes) return -ENOMEM; + state->fc_modified_inodes_size += + EXT4_FC_REPLAY_REALLOC_INCREMENT; } state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; return 0; @@ -1431,7 +1450,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, } inode = NULL; - ext4_fc_record_modified_inode(sb, ino); + ret = ext4_fc_record_modified_inode(sb, ino); + if (ret) + goto out; raw_fc_inode = (struct ext4_inode *) (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); @@ -1563,16 +1584,23 @@ out: } /* - * Record physical disk regions which are in use as per fast commit area. Our - * simple replay phase allocator excludes these regions from allocation. + * Record physical disk regions which are in use as per fast commit area, + * and used by inodes during replay phase. Our simple replay phase + * allocator excludes these regions from allocation. */ -static int ext4_fc_record_regions(struct super_block *sb, int ino, - ext4_lblk_t lblk, ext4_fsblk_t pblk, int len) +int ext4_fc_record_regions(struct super_block *sb, int ino, + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay) { struct ext4_fc_replay_state *state; struct ext4_fc_alloc_region *region; state = &EXT4_SB(sb)->s_fc_replay_state; + /* + * during replay phase, the fc_regions_valid may not same as + * fc_regions_used, update it when do new additions. + */ + if (replay && state->fc_regions_used != state->fc_regions_valid) + state->fc_regions_used = state->fc_regions_valid; if (state->fc_regions_used == state->fc_regions_size) { state->fc_regions_size += EXT4_FC_REPLAY_REALLOC_INCREMENT; @@ -1590,6 +1618,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino, region->pblk = pblk; region->len = len; + if (replay) + state->fc_regions_valid++; + return 0; } @@ -1621,6 +1652,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, } ret = ext4_fc_record_modified_inode(sb, inode->i_ino); + if (ret) + goto out; start = le32_to_cpu(ex->ee_block); start_pblk = ext4_ext_pblock(ex); @@ -1638,18 +1671,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb, map.m_pblk = 0; ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret < 0) { - iput(inode); - return 0; - } + if (ret < 0) + goto out; if (ret == 0) { /* Range is not mapped */ path = ext4_find_extent(inode, cur, NULL, 0); - if (IS_ERR(path)) { - iput(inode); - return 0; - } + if (IS_ERR(path)) + goto out; memset(&newex, 0, sizeof(newex)); newex.ee_block = cpu_to_le32(cur); ext4_ext_store_pblock( @@ -1663,10 +1692,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, up_write((&EXT4_I(inode)->i_data_sem)); ext4_ext_drop_refs(path); kfree(path); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; goto next; } @@ -1679,10 +1706,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ext4_ext_is_unwritten(ex), start_pblk + cur - start); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; /* * Mark the old blocks as free since they aren't used * anymore. We maintain an array of all the modified @@ -1702,10 +1727,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ext4_ext_is_unwritten(ex), map.m_pblk); ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ext4_ext_is_unwritten(ex), map.m_pblk); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; /* * We may have split the extent tree while toggling the state. * Try to shrink the extent tree now. @@ -1717,6 +1740,7 @@ next: } ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); +out: iput(inode); return 0; } @@ -1746,6 +1770,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } ret = ext4_fc_record_modified_inode(sb, inode->i_ino); + if (ret) + goto out; jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", inode->i_ino, le32_to_cpu(lrange.fc_lblk), @@ -1755,10 +1781,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, map.m_len = remaining; ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret < 0) { - iput(inode); - return 0; - } + if (ret < 0) + goto out; if (ret > 0) { remaining -= ret; cur += ret; @@ -1770,18 +1794,17 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } down_write(&EXT4_I(inode)->i_data_sem); - ret = ext4_ext_remove_space(inode, lrange.fc_lblk, - lrange.fc_lblk + lrange.fc_len - 1); + ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk), + le32_to_cpu(lrange.fc_lblk) + + le32_to_cpu(lrange.fc_len) - 1); up_write(&EXT4_I(inode)->i_data_sem); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); ext4_mark_inode_dirty(NULL, inode); +out: iput(inode); - return 0; } @@ -1937,7 +1960,7 @@ static int ext4_fc_replay_scan(journal_t *journal, ret = ext4_fc_record_regions(sb, le32_to_cpu(ext.fc_ino), le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), - ext4_ext_get_actual_len(ex)); + ext4_ext_get_actual_len(ex), 0); if (ret < 0) break; ret = JBD2_FC_REPLAY_CONTINUE; diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index f34f4176c1e7c24e921ea9b0dde96e422e3edfe4..147b5241dd94fe844ceb2637a8bc68dba428187f 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c @@ -290,7 +290,7 @@ static int __ext4fs_dirhash(const struct inode *dir, const char *name, int len, int ext4fs_dirhash(const struct inode *dir, const char *name, int len, struct dx_hash_info *hinfo) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) const struct unicode_map *um = dir->i_sb->s_encoding; int r, dlen; unsigned char *buff; diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 89efa78ed4b21c9b19d4842b27727f81398121df..07a8c75b65edc5f064319c531e1352f831617e60 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -696,7 +696,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, * Drop i_data_sem to avoid deadlock with ext4_map_blocks. At this * moment, get_block can be called only for blocks inside i_size since * page cache has been already dropped and writes are blocked by - * i_mutex. So we can safely drop the i_data_sem here. + * i_rwsem. So we can safely drop the i_data_sem here. */ BUG_ON(EXT4_JOURNAL(inode) == NULL); ext4_discard_preallocations(inode, 0); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 635bcf68a67eca5a29f95e3d9ff6e8cc125e63a2..e429418036050edb6a4e0c5d0deeeec5e313b1fb 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -911,7 +911,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping, struct page **pagep, void **fsdata) { - int ret, inline_size; + int ret; handle_t *handle; struct page *page; struct ext4_iloc iloc; @@ -928,14 +928,9 @@ retry_journal: goto out; } - inline_size = ext4_get_max_inline_size(inode); - - ret = -ENOSPC; - if (inline_size >= pos + len) { - ret = ext4_prepare_inline_data(handle, inode, pos + len); - if (ret && ret != -ENOSPC) - goto out_journal; - } + ret = ext4_prepare_inline_data(handle, inode, pos + len); + if (ret && ret != -ENOSPC) + goto out_journal; /* * We cannot recurse into the filesystem as the transaction @@ -1133,7 +1128,15 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc, void *buf, int inline_size) { - ext4_create_inline_data(handle, inode, inline_size); + int ret; + + ret = ext4_create_inline_data(handle, inode, inline_size); + if (ret) { + ext4_msg(inode->i_sb, KERN_EMERG, + "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", + inode->i_ino, ret); + return; + } ext4_write_inline_data(inode, iloc, buf, 0, inline_size); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5f79d265d06a023d7204ea773e59ffc852f2c40a..01c9e4f743ba9a6ed0b03c283574fd1ffb3632e2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -338,7 +338,7 @@ stop_handle: return; no_delete: if (!list_empty(&EXT4_I(inode)->i_fc_list)) - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL); ext4_clear_inode(inode); /* We must guarantee clearing of inode... */ } @@ -1224,7 +1224,7 @@ retry_journal: /* * __block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. + * i_size_read because we hold i_rwsem. * * Add inode to orphan list in case we crash before * truncate finishes @@ -3979,7 +3979,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) } - /* Wait all existing dio workers, newcomers will block on i_mutex */ + /* Wait all existing dio workers, newcomers will block on i_rwsem */ inode_dio_wait(inode); /* @@ -4129,7 +4129,7 @@ int ext4_truncate(struct inode *inode) /* * There is a possibility that we're either freeing the inode * or it's a completely new inode. In those cases we might not - * have i_mutex locked because it's not necessary. + * have i_rwsem locked because it's not necessary. */ if (!(inode->i_state & (I_NEW|I_FREEING))) WARN_ON(!inode_is_locked(inode)); @@ -5271,7 +5271,7 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) * transaction are already on disk (truncate waits for pages under * writeback). * - * Called with inode->i_mutex down. + * Called with inode->i_rwsem down. */ int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *attr) @@ -5983,7 +5983,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) return PTR_ERR(handle); ext4_fc_mark_ineligible(inode->i_sb, - EXT4_FC_REASON_JOURNAL_FLAG_CHANGE); + EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle); err = ext4_mark_inode_dirty(handle, inode); ext4_handle_sync(handle); ext4_journal_stop(handle); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bbbedf27b71c4adcf5d098d9b61476fb8edc11c4..a8022c2c6a5823fde76e170fa2797e90bc7e8a75 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -411,7 +411,7 @@ static long swap_inode_boot_loader(struct super_block *sb, err = -EINVAL; goto err_out; } - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle); /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(inode, inode_bl); @@ -1373,7 +1373,7 @@ mext_out: err = ext4_resize_fs(sb, n_blocks_count); if (EXT4_SB(sb)->s_journal) { - ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE); + ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL); jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index cf2fd9fc7d986a16ed502c3d8bf19582674c487d..67ac95c4cd9b8308e9ab60d2fbaea55514d9c0ca 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2834,7 +2834,7 @@ out: static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group; if (*pos < 0 || *pos >= ext4_get_groups_count(sb)) @@ -2845,7 +2845,7 @@ static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group; ++*pos; @@ -2857,7 +2857,7 @@ static void *ext4_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos) static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); ext4_group_t group = (ext4_group_t) ((unsigned long) v); int i; int err, buddy_loaded = 0; @@ -2985,7 +2985,7 @@ int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset) static void *ext4_mb_seq_structs_summary_start(struct seq_file *seq, loff_t *pos) __acquires(&EXT4_SB(sb)->s_mb_rb_lock) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); unsigned long position; read_lock(&EXT4_SB(sb)->s_mb_rb_lock); @@ -2998,7 +2998,7 @@ __acquires(&EXT4_SB(sb)->s_mb_rb_lock) static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, loff_t *pos) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); unsigned long position; ++*pos; @@ -3010,7 +3010,7 @@ static void *ext4_mb_seq_structs_summary_next(struct seq_file *seq, void *v, lof static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); struct ext4_sb_info *sbi = EXT4_SB(sb); unsigned long position = ((unsigned long) v); struct ext4_group_info *grp; @@ -3058,7 +3058,7 @@ static int ext4_mb_seq_structs_summary_show(struct seq_file *seq, void *v) static void ext4_mb_seq_structs_summary_stop(struct seq_file *seq, void *v) __releases(&EXT4_SB(sb)->s_mb_rb_lock) { - struct super_block *sb = PDE_DATA(file_inode(seq->file)); + struct super_block *sb = pde_data(file_inode(seq->file)); read_unlock(&EXT4_SB(sb)->s_mb_rb_lock); } @@ -5753,7 +5753,8 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, struct super_block *sb = ar->inode->i_sb; ext4_group_t group; ext4_grpblk_t blkoff; - int i = sb->s_blocksize; + ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); + ext4_grpblk_t i = 0; ext4_fsblk_t goal, block; struct ext4_super_block *es = EXT4_SB(sb)->s_es; @@ -5775,19 +5776,26 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, ext4_get_group_no_and_offset(sb, max(ext4_group_first_block_no(sb, group), goal), NULL, &blkoff); - i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize, + while (1) { + i = mb_find_next_zero_bit(bitmap_bh->b_data, max, blkoff); + if (i >= max) + break; + if (ext4_fc_replay_check_excluded(sb, + ext4_group_first_block_no(sb, group) + i)) { + blkoff = i + 1; + } else + break; + } brelse(bitmap_bh); - if (i >= sb->s_blocksize) - continue; - if (ext4_fc_replay_check_excluded(sb, - ext4_group_first_block_no(sb, group) + i)) - continue; - break; + if (i < max) + break; } - if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize) + if (group >= ext4_get_groups_count(sb) || i >= max) { + *errp = -ENOSPC; return 0; + } block = ext4_group_first_block_no(sb, group) + i; ext4_mb_mark_bb(sb, block, 1, 1); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index ff8916e1d38e9195e399cd825f3e2705873429a8..7a5353a8cfd7b2005e676b6b67939a7bbfb12ece 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -485,7 +485,7 @@ int ext4_ext_migrate(struct inode *inode) * when we add extents we extent the journal */ /* - * Even though we take i_mutex we can still cause block + * Even though we take i_rwsem we can still cause block * allocation via mmap write to holes. If we have allocated * new blocks we fail migrate. New block allocation will * clear EXT4_STATE_EXT_MIGRATE flag. The flag is updated diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 52c9bd154122a6aa86fd1272b99fb659eba99aa7..8cf0a924a49bfc1f4cf79276307141941e31b388 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1317,7 +1317,7 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block) dx_set_count(entries, count + 1); } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* * Test whether a case-insensitive directory entry matches the filename * being searched for. If quick is set, assume the name being looked up @@ -1428,7 +1428,7 @@ static bool ext4_match(struct inode *parent, f.crypto_buf = fname->crypto_buf; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) && (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) { if (fname->cf_name.name) { @@ -1800,7 +1800,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi } } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (!inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as @@ -2308,7 +2308,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry, if (fscrypt_is_nokey_name(dentry)) return -ENOKEY; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) && sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name)) return -EINVAL; @@ -3126,7 +3126,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry) ext4_fc_track_unlink(handle, dentry); retval = ext4_mark_inode_dirty(handle, dir); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the @@ -3231,7 +3231,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) retval = __ext4_unlink(handle, dir, &dentry->d_name, d_inode(dentry)); if (!retval) ext4_fc_track_unlink(handle, dentry); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the @@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir, * dirents in directories. */ ext4_fc_mark_ineligible(old.inode->i_sb, - EXT4_FC_REASON_RENAME_DIR); + EXT4_FC_REASON_RENAME_DIR, handle); } else { if (new.inode) ext4_fc_track_unlink(handle, new.dentry); @@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (unlikely(retval)) goto end_rename; ext4_fc_mark_ineligible(new.inode->i_sb, - EXT4_FC_REASON_CROSS_RENAME); + EXT4_FC_REASON_CROSS_RENAME, handle); if (old.dir_bh) { retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); if (retval) diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c index 53adc8f570a3f25fa83d9bb87f976b6c329d58e5..7de0612eb42d81a7ec39ec00693aaec925750846 100644 --- a/fs/ext4/orphan.c +++ b/fs/ext4/orphan.c @@ -93,7 +93,7 @@ static int ext4_orphan_file_add(handle_t *handle, struct inode *inode) * At filesystem recovery time, we walk this list deleting unlinked * inodes and truncating linked inodes in ext4_orphan_cleanup(). * - * Orphan list manipulation functions must be called under i_mutex unless + * Orphan list manipulation functions must be called under i_rwsem unless * we are just creating the inode or deleting it. */ int ext4_orphan_add(handle_t *handle, struct inode *inode) @@ -119,7 +119,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) /* * Orphan handling is only valid for files with data blocks * being truncated, or files being unlinked. Note that we either - * hold i_mutex, or the inode can not be referenced from outside, + * hold i_rwsem, or the inode can not be referenced from outside, * so i_nlink should not be bumped due to race */ ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 3db9234035053ed976030abc2152acf9c3aac84a..4cd62f1d848c86c3a1437c560991dbc1d8d0490d 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -43,7 +43,6 @@ #include #include #include -#include #include "ext4.h" @@ -350,11 +349,6 @@ int ext4_mpage_readpages(struct inode *inode, } else if (fully_mapped) { SetPageMappedToDisk(page); } - if (fully_mapped && blocks_per_page == 1 && - !PageUptodate(page) && cleancache_get_page(page) == 0) { - SetPageUptodate(page); - goto confused; - } /* * This page will go to BIO. Do we need to send this diff --git a/fs/ext4/super.c b/fs/ext4/super.c index db9fe48435293d40f5e1c9997ed126526a32e41e..c5021ca0a28ad52d99d1f3ee19c490e390c2fe35 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -1302,7 +1301,7 @@ static void ext4_put_super(struct super_block *sb) kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev); fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif kfree(sbi); @@ -1962,7 +1961,7 @@ static const struct mount_opts { {Opt_err, 0, 0} }; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) static const struct ext4_sb_encodings { __u16 magic; char *name; @@ -3149,8 +3148,6 @@ done: EXT4_BLOCKS_PER_GROUP(sb), EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt, sbi->s_mount_opt2); - - cleancache_init_fs(sb); return err; } @@ -3609,7 +3606,7 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_UNICODE +#if !IS_ENABLED(CONFIG_UNICODE) if (ext4_has_feature_casefold(sb)) { ext4_msg(sb, KERN_ERR, "Filesystem with casefold feature cannot be " @@ -4613,7 +4610,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (err < 0) goto failed_mount; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { const struct ext4_sb_encodings *encoding_info; struct unicode_map *encoding; @@ -5085,7 +5082,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); sbi->s_fc_bytes = 0; ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); - ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); + sbi->s_fc_ineligible_tid = 0; spin_lock_init(&sbi->s_fc_lock); memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); sbi->s_fc_replay_state.fc_regions = NULL; @@ -5517,7 +5514,7 @@ failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif @@ -5543,7 +5540,7 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc) sbi = ext4_alloc_sbi(sb); if (!sbi) - ret = -ENOMEM; + return -ENOMEM; fc->s_fs_info = sbi; diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index f61e65ae27d8d78311a188fb3265ddceae7dc43a..d233c24ea34258d3f97d4c1fe6912cd48fc8fbf5 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -309,7 +309,7 @@ EXT4_ATTR_FEATURE(meta_bg_resize); EXT4_ATTR_FEATURE(encryption); EXT4_ATTR_FEATURE(test_dummy_encryption_v2); #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) EXT4_ATTR_FEATURE(casefold); #endif #ifdef CONFIG_FS_VERITY @@ -317,7 +317,7 @@ EXT4_ATTR_FEATURE(verity); #endif EXT4_ATTR_FEATURE(metadata_csum_seed); EXT4_ATTR_FEATURE(fast_commit); -#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) +#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) EXT4_ATTR_FEATURE(encrypted_casefold); #endif @@ -329,7 +329,7 @@ static struct attribute *ext4_feat_attrs[] = { ATTR_LIST(encryption), ATTR_LIST(test_dummy_encryption_v2), #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) ATTR_LIST(casefold), #endif #ifdef CONFIG_FS_VERITY @@ -337,7 +337,7 @@ static struct attribute *ext4_feat_attrs[] = { #endif ATTR_LIST(metadata_csum_seed), ATTR_LIST(fast_commit), -#if defined(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) +#if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_FS_ENCRYPTION) ATTR_LIST(encrypted_casefold), #endif NULL, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 1e0fc1ed845bf6b04283844281b1bc056b66f5f9..0423253490986fab93430748c52b61a620610715 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -2408,7 +2408,7 @@ retry_inode: if (IS_SYNC(inode)) ext4_handle_sync(handle); } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle); cleanup: brelse(is.iloc.bh); @@ -2486,7 +2486,7 @@ retry: if (error == 0) error = error2; } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL); return error; } @@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode, error); goto cleanup; } - ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR); + ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle); } error = 0; cleanup: diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 0a1d236212f852dff6cf7d18ccbc9b2e10808986..8c417864c66ae3b5abdfbe0138cda383e71f9684 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -2035,12 +2034,6 @@ got_it: block_nr = map->m_pblk + block_in_file - map->m_lblk; SetPageMappedToDisk(page); - if (!PageUptodate(page) && (!PageSwapCache(page) && - !cleancache_get_page(page))) { - SetPageUptodate(page); - goto confused; - } - if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, DATA_GENERIC_ENHANCE_READ)) { ret = -EFSCORRUPTED; @@ -2096,12 +2089,6 @@ submit_and_realloc: ClearPageError(page); *last_block_in_bio = block_nr; goto out; -confused: - if (bio) { - __submit_bio(F2FS_I_SB(inode), bio, DATA); - bio = NULL; - } - unlock_page(page); out: *bio_ret = bio; return ret; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1820e9c106f7dd7f69608a7eb49c116a7be3e1e9..166f086233625167f041b150bcef43cc5751380b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -16,7 +16,7 @@ #include "xattr.h" #include -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) extern struct kmem_cache *f2fs_cf_name_slab; #endif @@ -79,7 +79,7 @@ unsigned char f2fs_get_de_type(struct f2fs_dir_entry *de) int f2fs_init_casefolded_name(const struct inode *dir, struct f2fs_filename *fname) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct super_block *sb = dir->i_sb; if (IS_CASEFOLDED(dir)) { @@ -174,7 +174,7 @@ void f2fs_free_filename(struct f2fs_filename *fname) kfree(fname->crypto_buf.name); fname->crypto_buf.name = NULL; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (fname->cf_name.name) { kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); fname->cf_name.name = NULL; @@ -208,7 +208,7 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir, return f2fs_find_target_dentry(&d, fname, max_slots); } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* * Test whether a case-insensitive directory entry matches the filename * being searched for. @@ -266,7 +266,7 @@ static inline int f2fs_match_name(const struct inode *dir, { struct fscrypt_name f; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (fname->cf_name.name) { struct qstr cf = FSTR_TO_QSTR(&fname->cf_name); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index eb22fa91c2b26694aab5a602d3e245db04fe2219..68b44015514f51834b2c63b13bb37fb98d228bd3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -488,7 +488,7 @@ struct f2fs_filename { */ struct fscrypt_str crypto_buf; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* * For casefolded directories: the casefolded name, but it's left NULL * if the original name is not valid Unicode, if the directory is both diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c index e3beac546c63a96acb1c2e863624238d85299ab1..3cb1e7a24740f7e6b48961a8f21da75c4c0b046c 100644 --- a/fs/f2fs/hash.c +++ b/fs/f2fs/hash.c @@ -105,7 +105,7 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname) return; } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (IS_CASEFOLDED(dir)) { /* * If the casefolded name is provided, hash it instead of the diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a728a0af9ce0c853427d014d10d89ea3e7a98311..5f213f05556dc62601ef722a3482587d7f7908e3 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -561,7 +561,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, goto out_iput; } out_splice: -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (!inode && IS_CASEFOLDED(dir)) { /* Eventually we want to call d_add_ci(dentry, NULL) * for negative dentries in the encoding case as @@ -622,7 +622,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) goto fail; } f2fs_delete_entry(de, page, dir, inode); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* VFS negative dentries are incompatible with Encoding and * Case-insensitiveness. Eventually we'll want avoid * invalidating the dentries here, alongside with returning the diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9683c80ff8c24403e40b1e757b800658bbaa480e..79773d322c4772f7e9fdcb9778ed846280eefc9e 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -46,7 +46,7 @@ static struct kmem_cache *fsync_entry_slab; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) extern struct kmem_cache *f2fs_cf_name_slab; #endif @@ -149,7 +149,7 @@ static int init_recovered_filename(const struct inode *dir, if (err) return err; f2fs_hash_filename(dir, fname); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* Case-sensitive match is fine for recovery */ kmem_cache_free(f2fs_cf_name_slab, fname->cf_name.name); fname->cf_name.name = NULL; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 575d3dc418d02d33be209c31730b5dc8063a1bf2..1dabc8244083d84d1337f34597bdd918e7805b03 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2555,8 +2555,8 @@ find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); if (secno >= MAIN_SECS(sbi)) { if (dir == ALLOC_RIGHT) { - secno = find_next_zero_bit(free_i->free_secmap, - MAIN_SECS(sbi), 0); + secno = find_first_zero_bit(free_i->free_secmap, + MAIN_SECS(sbi)); f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi)); } else { go_left = 1; @@ -2571,8 +2571,8 @@ find_other_zone: left_start--; continue; } - left_start = find_next_zero_bit(free_i->free_secmap, - MAIN_SECS(sbi), 0); + left_start = find_first_zero_bit(free_i->free_secmap, + MAIN_SECS(sbi)); f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi)); break; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 76e6a3df9abace603e73bd50fe543f0f1146a586..baefd398ec1a384b017b04200edd8abe4d1e106f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -257,7 +257,7 @@ void f2fs_printk(struct f2fs_sb_info *sbi, const char *fmt, ...) va_end(args); } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) static const struct f2fs_sb_encodings { __u16 magic; char *name; @@ -1259,7 +1259,7 @@ default_check: return -EINVAL; } #endif -#ifndef CONFIG_UNICODE +#if !IS_ENABLED(CONFIG_UNICODE) if (f2fs_sb_has_casefold(sbi)) { f2fs_err(sbi, "Filesystem with casefold feature cannot be mounted without CONFIG_UNICODE"); @@ -1619,7 +1619,7 @@ static void f2fs_put_super(struct super_block *sb) f2fs_destroy_iostat(sbi); for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif kfree(sbi); @@ -3903,7 +3903,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) static int f2fs_setup_casefold(struct f2fs_sb_info *sbi) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) { const struct f2fs_sb_encodings *encoding_info; struct unicode_map *encoding; @@ -4458,7 +4458,7 @@ free_bio_info: for (i = 0; i < NR_PAGE_TYPE; i++) kvfree(sbi->write_io[i]); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); sb->s_encoding = NULL; #endif diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index df406c16b2ebdecb902257628e4298e68092ffe5..8ac50667124540eaa2e7ece13b26ec46114a83d0 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -201,7 +201,7 @@ static ssize_t unusable_show(struct f2fs_attr *a, static ssize_t encoding_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct super_block *sb = sbi->sb; if (f2fs_sb_has_casefold(sbi)) @@ -778,7 +778,7 @@ F2FS_GENERAL_RO_ATTR(avg_vblocks); #ifdef CONFIG_FS_ENCRYPTION F2FS_FEATURE_RO_ATTR(encryption); F2FS_FEATURE_RO_ATTR(test_dummy_encryption_v2); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) F2FS_FEATURE_RO_ATTR(encrypted_casefold); #endif #endif /* CONFIG_FS_ENCRYPTION */ @@ -797,7 +797,7 @@ F2FS_FEATURE_RO_ATTR(lost_found); F2FS_FEATURE_RO_ATTR(verity); #endif F2FS_FEATURE_RO_ATTR(sb_checksum); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) F2FS_FEATURE_RO_ATTR(casefold); #endif F2FS_FEATURE_RO_ATTR(readonly); @@ -910,7 +910,7 @@ static struct attribute *f2fs_feat_attrs[] = { #ifdef CONFIG_FS_ENCRYPTION ATTR_LIST(encryption), ATTR_LIST(test_dummy_encryption_v2), -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) ATTR_LIST(encrypted_casefold), #endif #endif /* CONFIG_FS_ENCRYPTION */ @@ -929,7 +929,7 @@ static struct attribute *f2fs_feat_attrs[] = { ATTR_LIST(verity), #endif ATTR_LIST(sb_checksum), -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) ATTR_LIST(casefold), #endif ATTR_LIST(readonly), diff --git a/fs/file_table.c b/fs/file_table.c index 45437f8e1003e6d80b6775efa4f9bf15995b1fd6..4969021fa6764b3872016a63bac195ff46e6a9d5 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -33,7 +33,7 @@ #include "internal.h" /* sysctl tunables... */ -struct files_stat_struct files_stat = { +static struct files_stat_struct files_stat = { .max_files = NR_FILE }; @@ -75,22 +75,55 @@ unsigned long get_max_files(void) } EXPORT_SYMBOL_GPL(get_max_files); +#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) + /* * Handle nr_files sysctl */ -#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) -int proc_nr_files(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +static int proc_nr_files(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } -#else -int proc_nr_files(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) + +static struct ctl_table fs_stat_sysctls[] = { + { + .procname = "file-nr", + .data = &files_stat, + .maxlen = sizeof(files_stat), + .mode = 0444, + .proc_handler = proc_nr_files, + }, + { + .procname = "file-max", + .data = &files_stat.max_files, + .maxlen = sizeof(files_stat.max_files), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra1 = SYSCTL_LONG_ZERO, + .extra2 = SYSCTL_LONG_MAX, + }, + { + .procname = "nr_open", + .data = &sysctl_nr_open, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &sysctl_nr_open_min, + .extra2 = &sysctl_nr_open_max, + }, + { } +}; + +static int __init init_fs_stat_sysctls(void) { - return -ENOSYS; + register_sysctl_init("fs", fs_stat_sysctls); + if (IS_ENABLED(CONFIG_BINFMT_MISC)) + register_sysctl_mount_point("fs/binfmt_misc"); + return 0; } +fs_initcall(init_fs_stat_sysctls); #endif static struct file *__alloc_file(int flags, const struct cred *cred) diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c index a57c6cbee858a2bfeeb87738e6268631a724c738..f2aa7dbad76674eab8dc2f12dec1cfee9c3e35af 100644 --- a/fs/fscache/volume.c +++ b/fs/fscache/volume.c @@ -142,12 +142,12 @@ static void fscache_wait_on_volume_collision(struct fscache_volume *candidate, unsigned int collidee_debug_id) { wait_var_event_timeout(&candidate->flags, - fscache_is_acquire_pending(candidate), 20 * HZ); + !fscache_is_acquire_pending(candidate), 20 * HZ); if (!fscache_is_acquire_pending(candidate)) { pr_notice("Potential volume collision new=%08x old=%08x", candidate->debug_id, collidee_debug_id); fscache_stat(&fscache_n_volumes_collision); - wait_var_event(&candidate->flags, fscache_is_acquire_pending(candidate)); + wait_var_event(&candidate->flags, !fscache_is_acquire_pending(candidate)); } } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 3e718cfc19a793eb29225eb685cc17b00f60d6c0..8c39a8571b1fa9bf4db49405dfcbdd73184181bb 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -704,10 +704,11 @@ static int gfs2_release(struct inode *inode, struct file *file) kfree(file->private_data); file->private_data = NULL; - if (gfs2_rs_active(&ip->i_res)) - gfs2_rs_delete(ip, &inode->i_writecount); - if (file->f_mode & FMODE_WRITE) + if (file->f_mode & FMODE_WRITE) { + if (gfs2_rs_active(&ip->i_res)) + gfs2_rs_delete(ip, &inode->i_writecount); gfs2_qa_put(ip); + } return 0; } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index b7ab8430333c2c1e429a158dd82d8a69c0de679a..6b23399eaee0cbcb6ea340a6741faf86957e4580 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -301,9 +301,6 @@ void gfs2_glock_queue_put(struct gfs2_glock *gl) void gfs2_glock_put(struct gfs2_glock *gl) { - /* last put could call sleepable dlm api */ - might_sleep(); - if (lockref_put_or_lock(&gl->gl_lockref)) return; diff --git a/fs/inode.c b/fs/inode.c index 980e7b7a546077af6a79639889d85b0bcf95fc27..63324df6fa273426765c37e14f83ff2ecc93e1b8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -67,11 +67,6 @@ const struct address_space_operations empty_aops = { }; EXPORT_SYMBOL(empty_aops); -/* - * Statistics gathering.. - */ -struct inodes_stat_t inodes_stat; - static DEFINE_PER_CPU(unsigned long, nr_inodes); static DEFINE_PER_CPU(unsigned long, nr_unused); @@ -106,13 +101,43 @@ long get_nr_dirty_inodes(void) * Handle nr_inode sysctl */ #ifdef CONFIG_SYSCTL -int proc_nr_inodes(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +/* + * Statistics gathering.. + */ +static struct inodes_stat_t inodes_stat; + +static int proc_nr_inodes(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { inodes_stat.nr_inodes = get_nr_inodes(); inodes_stat.nr_unused = get_nr_inodes_unused(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } + +static struct ctl_table inodes_sysctls[] = { + { + .procname = "inode-nr", + .data = &inodes_stat, + .maxlen = 2*sizeof(long), + .mode = 0444, + .proc_handler = proc_nr_inodes, + }, + { + .procname = "inode-state", + .data = &inodes_stat, + .maxlen = 7*sizeof(long), + .mode = 0444, + .proc_handler = proc_nr_inodes, + }, + { } +}; + +static int __init init_fs_inode_sysctls(void) +{ + register_sysctl_init("fs", inodes_sysctls); + return 0; +} +early_initcall(init_fs_inode_sysctls); #endif static int no_open(struct inode *inode, struct file *file) diff --git a/fs/io_uring.c b/fs/io_uring.c index e54c4127422e42ff34aa02367ad788f13d5f34a4..77b9c7e4793bf3332a0229ed74603d195b761756 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5228,7 +5228,6 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) min_ret = iov_iter_count(&msg.msg_iter); ret = sock_recvmsg(sock, &msg, flags); -out_free: if (ret < min_ret) { if (ret == -EAGAIN && force_nonblock) return -EAGAIN; @@ -5236,9 +5235,9 @@ out_free: ret = -EINTR; req_set_fail(req); } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { +out_free: req_set_fail(req); } - __io_req_complete(req, issue_flags, ret, io_put_kbuf(req)); return 0; } @@ -7822,10 +7821,15 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) struct io_ring_ctx *ctx = node->rsrc_data->ctx; unsigned long flags; bool first_add = false; + unsigned long delay = HZ; spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); node->done = true; + /* if we are mid-quiesce then do not delay */ + if (node->rsrc_data->quiesce) + delay = 0; + while (!list_empty(&ctx->rsrc_ref_list)) { node = list_first_entry(&ctx->rsrc_ref_list, struct io_rsrc_node, node); @@ -7838,10 +7842,10 @@ static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); if (first_add) - mod_delayed_work(system_wq, &ctx->rsrc_put_work, HZ); + mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); } -static struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx) +static struct io_rsrc_node *io_rsrc_node_alloc(void) { struct io_rsrc_node *ref_node; @@ -7892,7 +7896,7 @@ static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx) { if (ctx->rsrc_backup_node) return 0; - ctx->rsrc_backup_node = io_rsrc_node_alloc(ctx); + ctx->rsrc_backup_node = io_rsrc_node_alloc(); return ctx->rsrc_backup_node ? 0 : -ENOMEM; } @@ -8928,10 +8932,9 @@ static void io_mem_free(void *ptr) static void *io_mem_alloc(size_t size) { - gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP | - __GFP_NORETRY | __GFP_ACCOUNT; + gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; - return (void *) __get_free_pages(gfp_flags, get_order(size)); + return (void *) __get_free_pages(gfp, get_order(size)); } static unsigned long rings_size(unsigned sq_entries, unsigned cq_entries, diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index c938bbad075e1ff72631ff4e93728d395f237126..6c51a75d0be612b71f755a15118556b865eefa94 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -21,6 +21,8 @@ #include "../internal.h" +#define IOEND_BATCH_SIZE 4096 + /* * Structure allocated for each folio when block size < folio size * to track sub-folio uptodate status and I/O completions. @@ -1039,7 +1041,7 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio, * state, release holds on bios, and finally free up memory. Do not use the * ioend after this. */ -static void +static u32 iomap_finish_ioend(struct iomap_ioend *ioend, int error) { struct inode *inode = ioend->io_inode; @@ -1048,6 +1050,7 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) u64 start = bio->bi_iter.bi_sector; loff_t offset = ioend->io_offset; bool quiet = bio_flagged(bio, BIO_QUIET); + u32 folio_count = 0; for (bio = &ioend->io_inline_bio; bio; bio = next) { struct folio_iter fi; @@ -1062,9 +1065,11 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) next = bio->bi_private; /* walk all folios in bio, ending page IO on them */ - bio_for_each_folio_all(fi, bio) + bio_for_each_folio_all(fi, bio) { iomap_finish_folio_write(inode, fi.folio, fi.length, error); + folio_count++; + } bio_put(bio); } /* The ioend has been freed by bio_put() */ @@ -1074,20 +1079,36 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error) "%s: writeback error on inode %lu, offset %lld, sector %llu", inode->i_sb->s_id, inode->i_ino, offset, start); } + return folio_count; } +/* + * Ioend completion routine for merged bios. This can only be called from task + * contexts as merged ioends can be of unbound length. Hence we have to break up + * the writeback completions into manageable chunks to avoid long scheduler + * holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get + * good batch processing throughput without creating adverse scheduler latency + * conditions. + */ void iomap_finish_ioends(struct iomap_ioend *ioend, int error) { struct list_head tmp; + u32 completions; + + might_sleep(); list_replace_init(&ioend->io_list, &tmp); - iomap_finish_ioend(ioend, error); + completions = iomap_finish_ioend(ioend, error); while (!list_empty(&tmp)) { + if (completions > IOEND_BATCH_SIZE * 8) { + cond_resched(); + completions = 0; + } ioend = list_first_entry(&tmp, struct iomap_ioend, io_list); list_del_init(&ioend->io_list); - iomap_finish_ioend(ioend, error); + completions += iomap_finish_ioend(ioend, error); } } EXPORT_SYMBOL_GPL(iomap_finish_ioends); @@ -1108,6 +1129,18 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next) return false; if (ioend->io_offset + ioend->io_size != next->io_offset) return false; + /* + * Do not merge physically discontiguous ioends. The filesystem + * completion functions will have to iterate the physical + * discontiguities even if we merge the ioends at a logical level, so + * we don't gain anything by merging physical discontiguities here. + * + * We cannot use bio->bi_iter.bi_sector here as it is modified during + * submission so does not point to the start sector of the bio at + * completion. + */ + if (ioend->io_sector + (ioend->io_size >> 9) != next->io_sector) + return false; return true; } @@ -1209,8 +1242,10 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc, ioend->io_flags = wpc->iomap.flags; ioend->io_inode = inode; ioend->io_size = 0; + ioend->io_folios = 0; ioend->io_offset = offset; ioend->io_bio = bio; + ioend->io_sector = sector; return ioend; } @@ -1251,6 +1286,13 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset, return false; if (sector != bio_end_sector(wpc->ioend->io_bio)) return false; + /* + * Limit ioend bio chain lengths to minimise IO completion latency. This + * also prevents long tight loops ending page writeback on all the + * folios in the ioend. + */ + if (wpc->ioend->io_folios >= IOEND_BATCH_SIZE) + return false; return true; } @@ -1335,6 +1377,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc, &submit_list); count++; } + if (count) + wpc->ioend->io_folios++; WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list)); WARN_ON_ONCE(!folio_test_locked(folio)); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 3cc4ab2ba7f4f25a90ae1528a49cd6928455c937..5b9408e3b370d00d995e82934ad9eb96719c9b8b 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -484,22 +484,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start, stats.run.rs_locked); - spin_lock(&commit_transaction->t_handle_lock); - while (atomic_read(&commit_transaction->t_updates)) { - DEFINE_WAIT(wait); + // waits for any t_updates to finish + jbd2_journal_wait_updates(journal); - prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - if (atomic_read(&commit_transaction->t_updates)) { - spin_unlock(&commit_transaction->t_handle_lock); - write_unlock(&journal->j_state_lock); - schedule(); - write_lock(&journal->j_state_lock); - spin_lock(&commit_transaction->t_handle_lock); - } - finish_wait(&journal->j_wait_updates, &wait); - } - spin_unlock(&commit_transaction->t_handle_lock); commit_transaction->t_state = T_SWITCH; write_unlock(&journal->j_state_lock); @@ -817,7 +804,7 @@ start_journal_io: commit_transaction->t_state = T_COMMIT_DFLUSH; write_unlock(&journal->j_state_lock); - /* + /* * If the journal is not located on the file system device, * then we must flush the file system device before we issue * the commit record @@ -1170,7 +1157,7 @@ restart_loop: if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); if (journal->j_fc_cleanup_callback) - journal->j_fc_cleanup_callback(journal, 1); + journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid); trace_jbd2_end_commit(journal, commit_transaction); jbd_debug(1, "JBD2: commit %d complete, head %d\n", diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0b86a4365b669e14b1bc0b3b60aedb4c6b92e0ba..c2cf74b01ddb07b5fe9d9d6d919f69704ffbef41 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -771,7 +771,7 @@ static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback) { jbd2_journal_unlock_updates(journal); if (journal->j_fc_cleanup_callback) - journal->j_fc_cleanup_callback(journal, 0); + journal->j_fc_cleanup_callback(journal, 0, tid); write_lock(&journal->j_state_lock); journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING; if (fallback) @@ -1212,7 +1212,7 @@ static const struct seq_operations jbd2_seq_info_ops = { static int jbd2_seq_info_open(struct inode *inode, struct file *file) { - journal_t *journal = PDE_DATA(inode); + journal_t *journal = pde_data(inode); struct jbd2_stats_proc_session *s; int rc, size; @@ -1287,6 +1287,8 @@ static int jbd2_min_tag_size(void) /** * jbd2_journal_shrink_scan() + * @shrink: shrinker to work on + * @sc: reclaim request to process * * Scan the checkpointed buffer on the checkpoint list and release the * journal_head. @@ -1312,6 +1314,8 @@ static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink, /** * jbd2_journal_shrink_count() + * @shrink: shrinker to work on + * @sc: reclaim request to process * * Count the number of checkpoint buffers on the checkpoint list. */ @@ -2972,6 +2976,7 @@ struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) jbd_unlock_bh_journal_head(bh); return jh; } +EXPORT_SYMBOL(jbd2_journal_grab_journal_head); static void __journal_remove_journal_head(struct buffer_head *bh) { @@ -3024,6 +3029,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) jbd_unlock_bh_journal_head(bh); } } +EXPORT_SYMBOL(jbd2_journal_put_journal_head); /* * Initialize jbd inode head diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 6a3caedd22856feb650a084bfe0ef08bec838284..8e2f8275a2535a294f677aaa84338b210ea61277 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -449,7 +449,7 @@ repeat: } /* OK, account for the buffers that this operation expects to - * use and add the handle to the running transaction. + * use and add the handle to the running transaction. */ update_t_max_wait(transaction, ts); handle->h_transaction = transaction; @@ -836,6 +836,35 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) } EXPORT_SYMBOL(jbd2_journal_restart); +/* + * Waits for any outstanding t_updates to finish. + * This is called with write j_state_lock held. + */ +void jbd2_journal_wait_updates(journal_t *journal) +{ + transaction_t *commit_transaction = journal->j_running_transaction; + + if (!commit_transaction) + return; + + spin_lock(&commit_transaction->t_handle_lock); + while (atomic_read(&commit_transaction->t_updates)) { + DEFINE_WAIT(wait); + + prepare_to_wait(&journal->j_wait_updates, &wait, + TASK_UNINTERRUPTIBLE); + if (atomic_read(&commit_transaction->t_updates)) { + spin_unlock(&commit_transaction->t_handle_lock); + write_unlock(&journal->j_state_lock); + schedule(); + write_lock(&journal->j_state_lock); + spin_lock(&commit_transaction->t_handle_lock); + } + finish_wait(&journal->j_wait_updates, &wait); + } + spin_unlock(&commit_transaction->t_handle_lock); +} + /** * jbd2_journal_lock_updates () - establish a transaction barrier. * @journal: Journal to establish a barrier on. @@ -863,27 +892,9 @@ void jbd2_journal_lock_updates(journal_t *journal) write_lock(&journal->j_state_lock); } - /* Wait until there are no running updates */ - while (1) { - transaction_t *transaction = journal->j_running_transaction; - - if (!transaction) - break; + /* Wait until there are no running t_updates */ + jbd2_journal_wait_updates(journal); - spin_lock(&transaction->t_handle_lock); - prepare_to_wait(&journal->j_wait_updates, &wait, - TASK_UNINTERRUPTIBLE); - if (!atomic_read(&transaction->t_updates)) { - spin_unlock(&transaction->t_handle_lock); - finish_wait(&journal->j_wait_updates, &wait); - break; - } - spin_unlock(&transaction->t_handle_lock); - write_unlock(&journal->j_state_lock); - schedule(); - finish_wait(&journal->j_wait_updates, &wait); - write_lock(&journal->j_state_lock); - } write_unlock(&journal->j_state_lock); /* diff --git a/fs/ksmbd/auth.c b/fs/ksmbd/auth.c index dc3d061edda9285e16cc963557f811e975343887..911444d212673bf5ae2d639ab246577e1cd33a63 100644 --- a/fs/ksmbd/auth.c +++ b/fs/ksmbd/auth.c @@ -29,6 +29,7 @@ #include "mgmt/user_config.h" #include "crypto_ctx.h" #include "transport_ipc.h" +#include "../smbfs_common/arc4.h" /* * Fixed format data defining GSS header and fixed string @@ -336,6 +337,29 @@ int ksmbd_decode_ntlmssp_auth_blob(struct authenticate_message *authblob, nt_len - CIFS_ENCPWD_SIZE, domain_name, conn->ntlmssp.cryptkey); kfree(domain_name); + + /* The recovered secondary session key */ + if (conn->ntlmssp.client_flags & NTLMSSP_NEGOTIATE_KEY_XCH) { + struct arc4_ctx *ctx_arc4; + unsigned int sess_key_off, sess_key_len; + + sess_key_off = le32_to_cpu(authblob->SessionKey.BufferOffset); + sess_key_len = le16_to_cpu(authblob->SessionKey.Length); + + if (blob_len < (u64)sess_key_off + sess_key_len) + return -EINVAL; + + ctx_arc4 = kmalloc(sizeof(*ctx_arc4), GFP_KERNEL); + if (!ctx_arc4) + return -ENOMEM; + + cifs_arc4_setkey(ctx_arc4, sess->sess_key, + SMB2_NTLMV2_SESSKEY_SIZE); + cifs_arc4_crypt(ctx_arc4, sess->sess_key, + (char *)authblob + sess_key_off, sess_key_len); + kfree_sensitive(ctx_arc4); + } + return ret; } @@ -408,6 +432,9 @@ ksmbd_build_ntlmssp_challenge_blob(struct challenge_message *chgblob, (cflags & NTLMSSP_NEGOTIATE_EXTENDED_SEC)) flags |= NTLMSSP_NEGOTIATE_EXTENDED_SEC; + if (cflags & NTLMSSP_NEGOTIATE_KEY_XCH) + flags |= NTLMSSP_NEGOTIATE_KEY_XCH; + chgblob->NegotiateFlags = cpu_to_le32(flags); len = strlen(ksmbd_netbios_name()); name = kmalloc(2 + UNICODE_LEN(len), GFP_KERNEL); diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 1866c81c5c99f000896686242e650951fc0ff1fb..67e8e28e3fc3529648dc99b615fdae012afa3349 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -2688,7 +2688,7 @@ int smb2_open(struct ksmbd_work *work) (struct create_posix *)context; if (le16_to_cpu(context->DataOffset) + le32_to_cpu(context->DataLength) < - sizeof(struct create_posix)) { + sizeof(struct create_posix) - 4) { rc = -EINVAL; goto err_out1; } @@ -3422,9 +3422,9 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level, goto free_conv_name; } - struct_sz = readdir_info_level_struct_sz(info_level); - next_entry_offset = ALIGN(struct_sz - 1 + conv_len, - KSMBD_DIR_INFO_ALIGNMENT); + struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len; + next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT); + d_info->last_entry_off_align = next_entry_offset - struct_sz; if (next_entry_offset > d_info->out_buf_len) { d_info->out_buf_len = 0; @@ -3976,6 +3976,7 @@ int smb2_query_dir(struct ksmbd_work *work) ((struct file_directory_info *) ((char *)rsp->Buffer + d_info.last_entry_offset)) ->NextEntryOffset = 0; + d_info.data_count -= d_info.last_entry_off_align; rsp->StructureSize = cpu_to_le16(9); rsp->OutputBufferOffset = cpu_to_le16(72); @@ -6126,13 +6127,26 @@ static int smb2_set_remote_key_for_rdma(struct ksmbd_work *work, __le16 ChannelInfoOffset, __le16 ChannelInfoLength) { + unsigned int i, ch_count; + if (work->conn->dialect == SMB30_PROT_ID && Channel != SMB2_CHANNEL_RDMA_V1) return -EINVAL; - if (ChannelInfoOffset == 0 || - le16_to_cpu(ChannelInfoLength) < sizeof(*desc)) + ch_count = le16_to_cpu(ChannelInfoLength) / sizeof(*desc); + if (ksmbd_debug_types & KSMBD_DEBUG_RDMA) { + for (i = 0; i < ch_count; i++) { + pr_info("RDMA r/w request %#x: token %#x, length %#x\n", + i, + le32_to_cpu(desc[i].token), + le32_to_cpu(desc[i].length)); + } + } + if (ch_count != 1) { + ksmbd_debug(RDMA, "RDMA multiple buffer descriptors %d are not supported yet\n", + ch_count); return -EINVAL; + } work->need_invalidate_rkey = (Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE); @@ -6185,9 +6199,15 @@ int smb2_read(struct ksmbd_work *work) if (req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE || req->Channel == SMB2_CHANNEL_RDMA_V1) { + unsigned int ch_offset = le16_to_cpu(req->ReadChannelInfoOffset); + + if (ch_offset < offsetof(struct smb2_read_req, Buffer)) { + err = -EINVAL; + goto out; + } err = smb2_set_remote_key_for_rdma(work, (struct smb2_buffer_desc_v1 *) - &req->Buffer[0], + ((char *)req + ch_offset), req->Channel, req->ReadChannelInfoOffset, req->ReadChannelInfoLength); @@ -6428,11 +6448,16 @@ int smb2_write(struct ksmbd_work *work) if (req->Channel == SMB2_CHANNEL_RDMA_V1 || req->Channel == SMB2_CHANNEL_RDMA_V1_INVALIDATE) { - if (req->Length != 0 || req->DataOffset != 0) - return -EINVAL; + unsigned int ch_offset = le16_to_cpu(req->WriteChannelInfoOffset); + + if (req->Length != 0 || req->DataOffset != 0 || + ch_offset < offsetof(struct smb2_write_req, Buffer)) { + err = -EINVAL; + goto out; + } err = smb2_set_remote_key_for_rdma(work, (struct smb2_buffer_desc_v1 *) - &req->Buffer[0], + ((char *)req + ch_offset), req->Channel, req->WriteChannelInfoOffset, req->WriteChannelInfoLength); diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c index ef7f42b0290a8f00b60264f3ef6a92f0c2d974be..9a7e211dbf4f4dbe315dae775941df8d226aebe1 100644 --- a/fs/ksmbd/smb_common.c +++ b/fs/ksmbd/smb_common.c @@ -308,14 +308,17 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, for (i = 0; i < 2; i++) { struct kstat kstat; struct ksmbd_kstat ksmbd_kstat; + struct dentry *dentry; if (!dir->dot_dotdot[i]) { /* fill dot entry info */ if (i == 0) { d_info->name = "."; d_info->name_len = 1; + dentry = dir->filp->f_path.dentry; } else { d_info->name = ".."; d_info->name_len = 2; + dentry = dir->filp->f_path.dentry->d_parent; } if (!match_pattern(d_info->name, d_info->name_len, @@ -327,7 +330,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level, ksmbd_kstat.kstat = &kstat; ksmbd_vfs_fill_dentry_attrs(work, user_ns, - dir->filp->f_path.dentry->d_parent, + dentry, &ksmbd_kstat); rc = fn(conn, info_level, d_info, &ksmbd_kstat); if (rc) diff --git a/fs/ksmbd/transport_rdma.c b/fs/ksmbd/transport_rdma.c index 3c1ec1ac0b27041cd15c4433ca1282c1c47051c8..ba5a22bc2e6d8d31c286716d128ef4ecb8bc12f2 100644 --- a/fs/ksmbd/transport_rdma.c +++ b/fs/ksmbd/transport_rdma.c @@ -80,7 +80,7 @@ static int smb_direct_max_fragmented_recv_size = 1024 * 1024; /* The maximum single-message size which can be received */ static int smb_direct_max_receive_size = 8192; -static int smb_direct_max_read_write_size = 1048512; +static int smb_direct_max_read_write_size = 524224; static int smb_direct_max_outstanding_rw_ops = 8; diff --git a/fs/ksmbd/vfs.h b/fs/ksmbd/vfs.h index adf94a4f22fa6d21e058dfe29e2a01c7c5e3b8a2..8c37aaf936ab114c1c30c24224dc6b84a7c254e5 100644 --- a/fs/ksmbd/vfs.h +++ b/fs/ksmbd/vfs.h @@ -47,6 +47,7 @@ struct ksmbd_dir_info { int last_entry_offset; bool hide_dot_file; int flags; + int last_entry_off_align; }; struct ksmbd_readdir_data { diff --git a/fs/libfs.c b/fs/libfs.c index ba7438ab93710f9df4ed44609ecc6947eeb0cf0b..974125270a42820152bdac2d2d9cc79dac9de622 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1379,7 +1379,7 @@ bool is_empty_dir_inode(struct inode *inode) (inode->i_op == &empty_dir_inode_operations); } -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) /* * Determine if the name of a dentry should be casefolded. * @@ -1473,7 +1473,7 @@ static const struct dentry_operations generic_encrypted_dentry_ops = { }; #endif -#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE) +#if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE) static const struct dentry_operations generic_encrypted_ci_dentry_ops = { .d_hash = generic_ci_d_hash, .d_compare = generic_ci_d_compare, @@ -1508,10 +1508,10 @@ void generic_set_encrypted_ci_d_ops(struct dentry *dentry) #ifdef CONFIG_FS_ENCRYPTION bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) bool needs_ci_ops = dentry->d_sb->s_encoding; #endif -#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE) +#if defined(CONFIG_FS_ENCRYPTION) && IS_ENABLED(CONFIG_UNICODE) if (needs_encrypt_ops && needs_ci_ops) { d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops); return; @@ -1523,7 +1523,7 @@ void generic_set_encrypted_ci_d_ops(struct dentry *dentry) return; } #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) if (needs_ci_ops) { d_set_d_op(dentry, &generic_ci_dentry_ops); return; diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index cb3a7512c33ec8435bf76f6291f48a2f1c562e4c..0a22a2faf552245716f88837ddd59e34bfea79a6 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c @@ -179,19 +179,21 @@ nlm_delete_file(struct nlm_file *file) static int nlm_unlock_files(struct nlm_file *file) { struct file_lock lock; - struct file *f; + locks_init_lock(&lock); lock.fl_type = F_UNLCK; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; - for (f = file->f_file[0]; f <= file->f_file[1]; f++) { - if (f && vfs_lock_file(f, F_SETLK, &lock, NULL) < 0) { - pr_warn("lockd: unlock failure in %s:%d\n", - __FILE__, __LINE__); - return 1; - } - } + if (file->f_file[O_RDONLY] && + vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL)) + goto out_err; + if (file->f_file[O_WRONLY] && + vfs_lock_file(file->f_file[O_WRONLY], F_SETLK, &lock, NULL)) + goto out_err; return 0; +out_err: + pr_warn("lockd: unlock failure in %s:%d\n", __FILE__, __LINE__); + return 1; } /* diff --git a/fs/locks.c b/fs/locks.c index 0fca9d6809781ed4fea5c72d9e6a8cfda79e406b..8c6df10cd9ed2e6300c03015a6e717b0c6dd999c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -62,6 +62,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -88,8 +89,37 @@ static int target_leasetype(struct file_lock *fl) return fl->fl_type; } -int leases_enable = 1; -int lease_break_time = 45; +static int leases_enable = 1; +static int lease_break_time = 45; + +#ifdef CONFIG_SYSCTL +static struct ctl_table locks_sysctls[] = { + { + .procname = "leases-enable", + .data = &leases_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#ifdef CONFIG_MMU + { + .procname = "lease-break-time", + .data = &lease_break_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif /* CONFIG_MMU */ + {} +}; + +static int __init init_fs_locks_sysctls(void) +{ + register_sysctl_init("fs", locks_sysctls); + return 0; +} +early_initcall(init_fs_locks_sysctls); +#endif /* CONFIG_SYSCTL */ /* * The global file_lock_list is only used for displaying /proc/locks, so we diff --git a/fs/mpage.c b/fs/mpage.c index 334e7d09aa65275727d9f3db6df845bccb4b762b..87f5cfef6caa71c7333cca396563605700882f23 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -29,7 +29,6 @@ #include #include #include -#include #include "internal.h" /* @@ -284,12 +283,6 @@ static struct bio *do_mpage_readpage(struct mpage_readpage_args *args) SetPageMappedToDisk(page); } - if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) && - cleancache_get_page(page) == 0) { - SetPageUptodate(page); - goto confused; - } - /* * This page will go to BIO. Do we need to send this BIO off first? */ diff --git a/fs/namei.c b/fs/namei.c index d81f04f8d81880b84d34598948126a63e33319f2..3f1829b3ab5b7cda8e2a48071efd6ac23cff4bac 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1020,10 +1020,60 @@ static inline void put_link(struct nameidata *nd) path_put(&last->link); } -int sysctl_protected_symlinks __read_mostly = 0; -int sysctl_protected_hardlinks __read_mostly = 0; -int sysctl_protected_fifos __read_mostly; -int sysctl_protected_regular __read_mostly; +static int sysctl_protected_symlinks __read_mostly; +static int sysctl_protected_hardlinks __read_mostly; +static int sysctl_protected_fifos __read_mostly; +static int sysctl_protected_regular __read_mostly; + +#ifdef CONFIG_SYSCTL +static struct ctl_table namei_sysctls[] = { + { + .procname = "protected_symlinks", + .data = &sysctl_protected_symlinks, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "protected_hardlinks", + .data = &sysctl_protected_hardlinks, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "protected_fifos", + .data = &sysctl_protected_fifos, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { + .procname = "protected_regular", + .data = &sysctl_protected_regular, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + { } +}; + +static int __init init_fs_namei_sysctls(void) +{ + register_sysctl_init("fs", namei_sysctls); + return 0; +} +fs_initcall(init_fs_namei_sysctls); + +#endif /* CONFIG_SYSCTL */ /** * may_follow_link - Check symlink following for unsafe situations @@ -3974,13 +4024,12 @@ int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir, dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); - fsnotify_rmdir(dir, dentry); out: inode_unlock(dentry->d_inode); dput(dentry); if (!error) - d_delete(dentry); + d_delete_notify(dir, dentry); return error; } EXPORT_SYMBOL(vfs_rmdir); @@ -4102,7 +4151,6 @@ int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir, if (!error) { dont_mount(dentry); detach_mounts(dentry); - fsnotify_unlink(dir, dentry); } } } @@ -4110,9 +4158,11 @@ out: inode_unlock(target); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ - if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { + if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) { + fsnotify_unlink(dir, dentry); + } else if (!error) { fsnotify_link_count(target); - d_delete(dentry); + d_delete_notify(dir, dentry); } return error; diff --git a/fs/namespace.c b/fs/namespace.c index dc31ad6b370f39a88dd4ee6f5fafbcc02344b203..40b994a29e90df04ba89730e275e7e5db8ba8bce 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -37,7 +37,7 @@ #include "internal.h" /* Maximum number of mounts in a mount namespace */ -unsigned int sysctl_mount_max __read_mostly = 100000; +static unsigned int sysctl_mount_max __read_mostly = 100000; static unsigned int m_hash_mask __read_mostly; static unsigned int m_hash_shift __read_mostly; @@ -4620,3 +4620,25 @@ const struct proc_ns_operations mntns_operations = { .install = mntns_install, .owner = mntns_owner, }; + +#ifdef CONFIG_SYSCTL +static struct ctl_table fs_namespace_sysctls[] = { + { + .procname = "mount-max", + .data = &sysctl_mount_max, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, + { } +}; + +static int __init init_fs_namespace_sysctls(void) +{ + register_sysctl_init("fs", fs_namespace_sysctls); + return 0; +} +fs_initcall(init_fs_namespace_sysctls); + +#endif /* CONFIG_SYSCTL */ diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c index 6169659857b30391d7e76b732607c923337045d7..501da990c25905a55fd325f2167db89d4e0fb382 100644 --- a/fs/netfs/read_helper.c +++ b/fs/netfs/read_helper.c @@ -55,7 +55,8 @@ static struct netfs_read_request *netfs_alloc_read_request( INIT_WORK(&rreq->work, netfs_rreq_work); refcount_set(&rreq->usage, 1); __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); - ops->init_rreq(rreq, file); + if (ops->init_rreq) + ops->init_rreq(rreq, file); netfs_stat(&netfs_n_rh_rreq); } diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 6a2033131c068e82e9934c6baba20c243dfad62f..ccd4f245cae240b812c3a7e6d44e2de32de819c5 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -170,7 +170,7 @@ struct cb_devicenotifyitem { }; struct cb_devicenotifyargs { - int ndevs; + uint32_t ndevs; struct cb_devicenotifyitem *devs; }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 09c5b1cb3e0752ebdea7abdd322e75c539885283..c343666d9a4282cc47f37880cd40fa8096ddde98 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -358,7 +358,7 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, struct cb_process_state *cps) { struct cb_devicenotifyargs *args = argp; - int i; + uint32_t i; __be32 res = 0; struct nfs_client *clp = cps->clp; struct nfs_server *server = NULL; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index a67c41ec545fd57b6c60a4a22612d7c0fd35987c..f90de8043b0f9618307373a1ccba7905505625c1 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -258,11 +258,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, void *argp) { struct cb_devicenotifyargs *args = argp; + uint32_t tmp, n, i; __be32 *p; __be32 status = 0; - u32 tmp; - int n, i; - args->ndevs = 0; /* Num of device notifications */ p = xdr_inline_decode(xdr, sizeof(uint32_t)); @@ -271,7 +269,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, goto out; } n = ntohl(*p++); - if (n <= 0) + if (n == 0) goto out; if (n > ULONG_MAX / sizeof(*args->devs)) { status = htonl(NFS4ERR_BADXDR); @@ -330,19 +328,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, dev->cbd_immediate = 0; } - args->ndevs++; - dprintk("%s: type %d layout 0x%x immediate %d\n", __func__, dev->cbd_notify_type, dev->cbd_layout_type, dev->cbd_immediate); } + args->ndevs = n; + dprintk("%s: ndevs %d\n", __func__, args->ndevs); + return 0; +err: + kfree(args->devs); out: + args->devs = NULL; + args->ndevs = 0; dprintk("%s: status %d ndevs %d\n", __func__, ntohl(status), args->ndevs); return status; -err: - kfree(args->devs); - goto out; } static __be32 decode_sessionid(struct xdr_stream *xdr, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 8d8b85b5a641911188785b83a3c04a24b4031c4e..d1f34229e11ab528129988eac0f0a3fb9f13a4d0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -177,6 +177,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) INIT_LIST_HEAD(&clp->cl_superblocks); clp->cl_rpcclient = ERR_PTR(-EINVAL); + clp->cl_flags = cl_init->init_flags; clp->cl_proto = cl_init->proto; clp->cl_nconnect = cl_init->nconnect; clp->cl_max_connect = cl_init->max_connect ? cl_init->max_connect : 1; @@ -423,7 +424,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) list_add_tail(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); - new->cl_flags = cl_init->init_flags; return rpc_ops->init_client(new, cl_init); } @@ -856,6 +856,13 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str server->namelen = pathinfo.max_namelen; } + if (clp->rpc_ops->discover_trunking != NULL && + (server->caps & NFS_CAP_FS_LOCATIONS)) { + error = clp->rpc_ops->discover_trunking(server, mntfh); + if (error < 0) + return error; + } + return 0; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 347793626f19d7d61aa164e5d03013c5a3332283..7bc7cf6b26f091d95851d2286f63865a2129fa10 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -80,6 +80,7 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir ctx->dir_cookie = 0; ctx->dup_cookie = 0; ctx->page_index = 0; + ctx->eof = false; spin_lock(&dir->i_lock); if (list_empty(&nfsi->open_files) && (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) @@ -168,6 +169,7 @@ struct nfs_readdir_descriptor { unsigned int cache_entry_index; signed char duped; bool plus; + bool eob; bool eof; }; @@ -867,7 +869,8 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, status = nfs_readdir_page_filler(desc, entry, pages, pglen, arrays, narrays); - } while (!status && nfs_readdir_page_needs_filling(page)); + } while (!status && nfs_readdir_page_needs_filling(page) && + page_mapping(page)); nfs_readdir_free_pages(pages, array_size); out: @@ -988,7 +991,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, ent = &array->array[i]; if (!dir_emit(desc->ctx, ent->name, ent->name_len, nfs_compat_user_ino64(ent->ino), ent->d_type)) { - desc->eof = true; + desc->eob = true; break; } memcpy(desc->verf, verf, sizeof(desc->verf)); @@ -1004,7 +1007,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, desc->duped = 1; } if (array->page_is_eof) - desc->eof = true; + desc->eof = !desc->eob; kunmap(desc->page); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n", @@ -1041,12 +1044,13 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc) goto out; desc->page_index = 0; + desc->cache_entry_index = 0; desc->last_cookie = desc->dir_cookie; desc->duped = 0; status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz); - for (i = 0; !desc->eof && i < sz && arrays[i]; i++) { + for (i = 0; !desc->eob && i < sz && arrays[i]; i++) { desc->page = arrays[i]; nfs_do_filldir(desc, verf); } @@ -1105,9 +1109,15 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->duped = dir_ctx->duped; page_index = dir_ctx->page_index; desc->attr_gencount = dir_ctx->attr_gencount; + desc->eof = dir_ctx->eof; memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); spin_unlock(&file->f_lock); + if (desc->eof) { + res = 0; + goto out_free; + } + if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) && list_is_singular(&nfsi->open_files)) invalidate_mapping_pages(inode->i_mapping, page_index + 1, -1); @@ -1141,7 +1151,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) nfs_do_filldir(desc, nfsi->cookieverf); nfs_readdir_page_unlock_and_put_cached(desc); - } while (!desc->eof); + } while (!desc->eob && !desc->eof); spin_lock(&file->f_lock); dir_ctx->dir_cookie = desc->dir_cookie; @@ -1149,9 +1159,10 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) dir_ctx->duped = desc->duped; dir_ctx->attr_gencount = desc->attr_gencount; dir_ctx->page_index = desc->page_index; + dir_ctx->eof = desc->eof; memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf)); spin_unlock(&file->f_lock); - +out_free: kfree(desc); out: @@ -1193,6 +1204,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) if (offset == 0) memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf)); dir_ctx->duped = 0; + dir_ctx->eof = false; } spin_unlock(&filp->f_lock); return offset; @@ -1325,6 +1337,14 @@ void nfs_clear_verifier_delegated(struct inode *inode) EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated); #endif /* IS_ENABLED(CONFIG_NFS_V4) */ +static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry) +{ + if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) && + d_really_is_negative(dentry)) + return dentry->d_time == inode_peek_iversion_raw(dir); + return nfs_verify_change_attribute(dir, dentry->d_time); +} + /* * A check for whether or not the parent directory has changed. * In the case it has, we assume that the dentries are untrustworthy @@ -1338,7 +1358,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, return 1; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) return 0; - if (!nfs_verify_change_attribute(dir, dentry->d_time)) + if (!nfs_dentry_verify_change(dir, dentry)) return 0; /* Revalidate nfsi->cache_change_attribute before we declare a match */ if (nfs_mapping_need_revalidate_inode(dir)) { @@ -1347,7 +1367,7 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry, if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) return 0; } - if (!nfs_verify_change_attribute(dir, dentry->d_time)) + if (!nfs_dentry_verify_change(dir, dentry)) return 0; return 1; } @@ -1437,6 +1457,9 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; + /* Case insensitive server? Revalidate negative dentries */ + if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) + return 1; return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU); } @@ -1537,7 +1560,7 @@ out: * If the lookup failed despite the dentry change attribute being * a match, then we should revalidate the directory cache. */ - if (!ret && nfs_verify_change_attribute(dir, dentry->d_time)) + if (!ret && nfs_dentry_verify_change(dir, dentry)) nfs_mark_dir_for_revalidate(dir); return nfs_lookup_revalidate_done(dir, dentry, inode, ret); } @@ -1776,8 +1799,11 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in dir_verifier = nfs_save_change_attribute(dir); trace_nfs_lookup_enter(dir, dentry, flags); error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr); - if (error == -ENOENT) + if (error == -ENOENT) { + if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) + dir_verifier = inode_peek_iversion_raw(dir); goto no_entry; + } if (error < 0) { res = ERR_PTR(error); goto out; @@ -1806,6 +1832,14 @@ out: } EXPORT_SYMBOL_GPL(nfs_lookup); +void nfs_d_prune_case_insensitive_aliases(struct inode *inode) +{ + /* Case insensitive server? Revalidate dentries */ + if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE)) + d_prune_aliases(inode); +} +EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases); + #if IS_ENABLED(CONFIG_NFS_V4) static int nfs4_lookup_revalidate(struct dentry *, unsigned int); @@ -1867,6 +1901,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; unsigned int lookup_flags = 0; + unsigned long dir_verifier; bool switched = false; int created = 0; int err; @@ -1940,7 +1975,11 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, switch (err) { case -ENOENT: d_splice_alias(NULL, dentry); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE)) + dir_verifier = inode_peek_iversion_raw(dir); + else + dir_verifier = nfs_save_change_attribute(dir); + nfs_set_verifier(dentry, dir_verifier); break; case -EISDIR: case -ENOTDIR: @@ -1968,6 +2007,24 @@ out: no_open: res = nfs_lookup(dir, dentry, lookup_flags); + if (!res) { + inode = d_inode(dentry); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !S_ISDIR(inode->i_mode)) + res = ERR_PTR(-ENOTDIR); + else if (inode && S_ISREG(inode->i_mode)) + res = ERR_PTR(-EOPENSTALE); + } else if (!IS_ERR(res)) { + inode = d_inode(res); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !S_ISDIR(inode->i_mode)) { + dput(res); + res = ERR_PTR(-ENOTDIR); + } else if (inode && S_ISREG(inode->i_mode)) { + dput(res); + res = ERR_PTR(-EOPENSTALE); + } + } if (switched) { d_lookup_done(dentry); if (!res) @@ -2186,8 +2243,10 @@ static void nfs_dentry_remove_handle_error(struct inode *dir, switch (error) { case -ENOENT: d_delete(dentry); - fallthrough; + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + break; case 0: + nfs_d_prune_case_insensitive_aliases(d_inode(dentry)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } } @@ -2380,6 +2439,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) trace_nfs_link_enter(inode, dir, dentry); d_drop(dentry); + if (S_ISREG(inode->i_mode)) + nfs_sync_inode(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -2469,6 +2530,8 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir, } } + if (S_ISREG(old_inode->i_mode)) + nfs_sync_inode(old_inode); task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); if (IS_ERR(task)) { error = PTR_ERR(task); @@ -2529,7 +2592,7 @@ MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache lengt static void nfs_access_free_entry(struct nfs_access_entry *entry) { - put_cred(entry->cred); + put_group_info(entry->group_info); kfree_rcu(entry, rcu_head); smp_mb__before_atomic(); atomic_long_dec(&nfs_access_nr_entries); @@ -2655,6 +2718,43 @@ void nfs_access_zap_cache(struct inode *inode) } EXPORT_SYMBOL_GPL(nfs_access_zap_cache); +static int access_cmp(const struct cred *a, const struct nfs_access_entry *b) +{ + struct group_info *ga, *gb; + int g; + + if (uid_lt(a->fsuid, b->fsuid)) + return -1; + if (uid_gt(a->fsuid, b->fsuid)) + return 1; + + if (gid_lt(a->fsgid, b->fsgid)) + return -1; + if (gid_gt(a->fsgid, b->fsgid)) + return 1; + + ga = a->group_info; + gb = b->group_info; + if (ga == gb) + return 0; + if (ga == NULL) + return -1; + if (gb == NULL) + return 1; + if (ga->ngroups < gb->ngroups) + return -1; + if (ga->ngroups > gb->ngroups) + return 1; + + for (g = 0; g < ga->ngroups; g++) { + if (gid_lt(ga->gid[g], gb->gid[g])) + return -1; + if (gid_gt(ga->gid[g], gb->gid[g])) + return 1; + } + return 0; +} + static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred) { struct rb_node *n = NFS_I(inode)->access_cache.rb_node; @@ -2662,7 +2762,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co while (n != NULL) { struct nfs_access_entry *entry = rb_entry(n, struct nfs_access_entry, rb_node); - int cmp = cred_fscmp(cred, entry->cred); + int cmp = access_cmp(cred, entry); if (cmp < 0) n = n->rb_left; @@ -2674,7 +2774,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co return NULL; } -static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) +static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -2704,8 +2804,7 @@ static int nfs_access_get_cached_locked(struct inode *inode, const struct cred * spin_lock(&inode->i_lock); retry = false; } - res->cred = cache->cred; - res->mask = cache->mask; + *mask = cache->mask; list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); err = 0; out: @@ -2717,7 +2816,7 @@ out_zap: return -ENOENT; } -static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res) +static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask) { /* Only check the most recently returned cache entry, * but do it without locking. @@ -2733,35 +2832,36 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru)); cache = list_entry(lh, struct nfs_access_entry, lru); if (lh == &nfsi->access_cache_entry_lru || - cred_fscmp(cred, cache->cred) != 0) + access_cmp(cred, cache) != 0) cache = NULL; if (cache == NULL) goto out; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; - res->cred = cache->cred; - res->mask = cache->mask; + *mask = cache->mask; err = 0; out: rcu_read_unlock(); return err; } -int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct -nfs_access_entry *res, bool may_block) +int nfs_access_get_cached(struct inode *inode, const struct cred *cred, + u32 *mask, bool may_block) { int status; - status = nfs_access_get_cached_rcu(inode, cred, res); + status = nfs_access_get_cached_rcu(inode, cred, mask); if (status != 0) - status = nfs_access_get_cached_locked(inode, cred, res, + status = nfs_access_get_cached_locked(inode, cred, mask, may_block); return status; } EXPORT_SYMBOL_GPL(nfs_access_get_cached); -static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) +static void nfs_access_add_rbtree(struct inode *inode, + struct nfs_access_entry *set, + const struct cred *cred) { struct nfs_inode *nfsi = NFS_I(inode); struct rb_root *root_node = &nfsi->access_cache; @@ -2774,7 +2874,7 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry * while (*p != NULL) { parent = *p; entry = rb_entry(parent, struct nfs_access_entry, rb_node); - cmp = cred_fscmp(set->cred, entry->cred); + cmp = access_cmp(cred, entry); if (cmp < 0) p = &parent->rb_left; @@ -2796,13 +2896,16 @@ found: nfs_access_free_entry(entry); } -void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set, + const struct cred *cred) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) return; RB_CLEAR_NODE(&cache->rb_node); - cache->cred = get_cred(set->cred); + cache->fsuid = cred->fsuid; + cache->fsgid = cred->fsgid; + cache->group_info = get_group_info(cred->group_info); cache->mask = set->mask; /* The above field assignments must be visible @@ -2810,7 +2913,7 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) * use rcu_assign_pointer, so just force the memory barrier. */ smp_wmb(); - nfs_access_add_rbtree(inode, cache); + nfs_access_add_rbtree(inode, cache, cred); /* Update accounting */ smp_mb__before_atomic(); @@ -2875,7 +2978,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) trace_nfs_access_enter(inode); - status = nfs_access_get_cached(inode, cred, &cache, may_block); + status = nfs_access_get_cached(inode, cred, &cache.mask, may_block); if (status == 0) goto out_cached; @@ -2895,8 +2998,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP; else cache.mask |= NFS_ACCESS_EXECUTE; - cache.cred = cred; - status = NFS_PROTO(inode)->access(inode, &cache); + status = NFS_PROTO(inode)->access(inode, &cache, cred); if (status != 0) { if (status == -ESTALE) { if (!S_ISDIR(inode->i_mode)) @@ -2906,7 +3008,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) } goto out; } - nfs_access_add_cache(inode, &cache); + nfs_access_add_cache(inode, &cache, cred); out_cached: cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode); if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) diff --git a/fs/nfs/filelayout/filelayout.h b/fs/nfs/filelayout/filelayout.h index 79323b5dab0cb38a212318d774273cb7ce88187f..aed0748fd6ec74e5bbc76764f6bd0218c7a10058 100644 --- a/fs/nfs/filelayout/filelayout.h +++ b/fs/nfs/filelayout/filelayout.h @@ -51,7 +51,7 @@ struct nfs4_file_layout_dsaddr { u32 stripe_count; u8 *stripe_indices; u32 ds_num; - struct nfs4_pnfs_ds *ds_list[1]; + struct nfs4_pnfs_ds *ds_list[]; }; struct nfs4_filelayout_segment { diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c index 86c3f7e69ec423d4f13952ee2eea8622b7e8b2a0..acf4b88889dc3814ebfe06d3883c96f429415f2a 100644 --- a/fs/nfs/filelayout/filelayoutdev.c +++ b/fs/nfs/filelayout/filelayoutdev.c @@ -136,9 +136,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, goto out_err_free_stripe_indices; } - dsaddr = kzalloc(sizeof(*dsaddr) + - (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), - gfp_flags); + dsaddr = kzalloc(struct_size(dsaddr, ds_list, num), gfp_flags); if (!dsaddr) goto out_err_free_stripe_indices; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 12f6acb483bbd60b002121bc9a56bbcbc5f16b52..2de7c56a1fbedb2e1e9077c7ba5bebf5992a0954 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -373,6 +373,7 @@ extern unsigned long nfs_access_cache_count(struct shrinker *shrink, extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc); struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); +void nfs_d_prune_case_insensitive_aliases(struct inode *inode); int nfs_create(struct user_namespace *, struct inode *, struct dentry *, umode_t, bool); int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *, diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7100514d306bb7a7aa330c9f9e3a1c089074ae91..1597eef40d54f6f7005d9326e154323b7f104fd3 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -220,7 +220,8 @@ static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, task_flags); } -static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) +static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry, + const struct cred *cred) { struct nfs3_accessargs arg = { .fh = NFS_FH(inode), @@ -231,7 +232,7 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], .rpc_argp = &arg, .rpc_resp = &res, - .rpc_cred = entry->cred, + .rpc_cred = cred, }; int status = -ENOMEM; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 8b21ff1be7175488975ef1029d193df2abd11b36..32129446beca6cc4c3cc0f007e88d4dde630ceeb 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -46,7 +46,7 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, { struct inode *inode = file_inode(filep); struct nfs_server *server = NFS_SERVER(inode); - u32 bitmask[3]; + u32 bitmask[NFS_BITMASK_SZ]; struct nfs42_falloc_args args = { .falloc_fh = NFS_FH(inode), .falloc_offset = offset, @@ -69,9 +69,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, return status; } - memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask)); - if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED) - bitmask[1] |= FATTR4_WORD1_SPACE_USED; + nfs4_bitmask_set(bitmask, server->cache_consistency_bitmask, inode, + NFS_INO_INVALID_BLOCKS); res.falloc_fattr = nfs_alloc_fattr(); if (!res.falloc_fattr) @@ -1044,13 +1043,14 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, struct inode *src_inode = file_inode(src_f); struct inode *dst_inode = file_inode(dst_f); struct nfs_server *server = NFS_SERVER(dst_inode); + __u32 dst_bitmask[NFS_BITMASK_SZ]; struct nfs42_clone_args args = { .src_fh = NFS_FH(src_inode), .dst_fh = NFS_FH(dst_inode), .src_offset = src_offset, .dst_offset = dst_offset, .count = count, - .dst_bitmask = server->cache_consistency_bitmask, + .dst_bitmask = dst_bitmask, }; struct nfs42_clone_res res = { .server = server, @@ -1079,6 +1079,9 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, if (!res.dst_fattr) return -ENOMEM; + nfs4_bitmask_set(dst_bitmask, server->cache_consistency_bitmask, + dst_inode, NFS_INO_INVALID_BLOCKS); + status = nfs4_call_sync(server->client, server, msg, &args.seq_args, &res.seq_res, 0); trace_nfs4_clone(src_inode, dst_inode, &args, status); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index ed5eaca6801ee4d831595893221b7e7b9d22e289..84f39b6f1b1e9415d06a87bcdbf560bd62739548 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -260,8 +260,8 @@ struct nfs4_state_maintenance_ops { }; struct nfs4_mig_recovery_ops { - int (*get_locations)(struct inode *, struct nfs4_fs_locations *, - struct page *, const struct cred *); + int (*get_locations)(struct nfs_server *, struct nfs_fh *, + struct nfs4_fs_locations *, struct page *, const struct cred *); int (*fsid_present)(struct inode *, const struct cred *); }; @@ -280,7 +280,8 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, int nfs4_submount(struct fs_context *, struct nfs_server *); int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); - +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, + size_t salen, struct net *net, int port); /* nfs4proc.c */ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_async_handle_error(struct rpc_task *task, @@ -302,8 +303,9 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, struct nfs4_fs_locations *, struct page *); -extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, - struct page *page, const struct cred *); +extern int nfs4_proc_get_locations(struct nfs_server *, struct nfs_fh *, + struct nfs4_fs_locations *, + struct page *page, const struct cred *); extern int nfs4_proc_fsid_present(struct inode *, const struct cred *); extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct dentry *, @@ -315,6 +317,8 @@ extern int nfs4_set_rw_stateid(nfs4_stateid *stateid, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, fmode_t fmode); +extern void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], + struct inode *inode, unsigned long cache_validity); extern int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct inode *inode); extern int update_open_stateid(struct nfs4_state *state, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index d8b5a250ca0501d5edc33a56d28811155f2966fe..47a6cf892c95adbbcfddad7e6f1275f11678da6f 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1343,8 +1343,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, } nfs_put_client(clp); - if (server->nfs_client->cl_hostname == NULL) + if (server->nfs_client->cl_hostname == NULL) { server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); + if (server->nfs_client->cl_hostname == NULL) + return -ENOMEM; + } nfs_server_insert_lists(server); return nfs_probe_server(server, NFS_FH(d_inode(server->super->s_root))); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 873342308dc0d9a131d93665b338e3eb1a49d6e9..3680c8da510c9787bcbae896a292a2cfec039469 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -164,16 +164,21 @@ static int nfs4_validate_fspath(struct dentry *dentry, return 0; } -static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen, struct net *net) +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, + size_t salen, struct net *net, int port) { ssize_t ret; ret = rpc_pton(net, string, len, sa, salen); if (ret == 0) { - ret = nfs_dns_resolve_name(net, string, len, sa, salen); - if (ret < 0) - ret = 0; + ret = rpc_uaddr2sockaddr(net, string, len, sa, salen); + if (ret == 0) { + ret = nfs_dns_resolve_name(net, string, len, sa, salen); + if (ret < 0) + ret = 0; + } + } else if (port) { + rpc_set_port(sa, port); } return ret; } @@ -328,7 +333,7 @@ static int try_location(struct fs_context *fc, nfs_parse_server_name(buf->data, buf->len, &ctx->nfs_server.address, sizeof(ctx->nfs_server._address), - fc->net_ns); + fc->net_ns, 0); if (ctx->nfs_server.addrlen == 0) continue; @@ -496,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, continue; salen = nfs_parse_server_name(buf->data, buf->len, - sap, addr_bufsize, net); + sap, addr_bufsize, net, 0); if (salen == 0) continue; rpc_set_port(sap, NFS_PORT); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ee3bc79f6ca3a56271b4f4077be29d435e2ca915..f5020828ab651efd1d2d85eeb440839d0f6615c6 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -108,10 +108,6 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *, static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, const struct cred *, bool); #endif -static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], - const __u32 *src, struct inode *inode, - struct nfs_server *server, - struct nfs4_label *label); #ifdef CONFIG_NFS_V4_SECURITY_LABEL static inline struct nfs4_label * @@ -2653,9 +2649,8 @@ static int nfs4_opendata_access(const struct cred *cred, } else if ((fmode & FMODE_READ) && !opendata->file_created) mask = NFS4_ACCESS_READ; - cache.cred = cred; nfs_access_set_mask(&cache, opendata->o_res.access_result); - nfs_access_add_cache(state->inode, &cache); + nfs_access_add_cache(state->inode, &cache, cred); flags = NFS4_ACCESS_READ | NFS4_ACCESS_EXECUTE | NFS4_ACCESS_LOOKUP; if ((mask & ~cache.mask & flags) == 0) @@ -3670,7 +3665,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data) if (!nfs4_have_delegation(inode, FMODE_READ)) { nfs4_bitmask_set(calldata->arg.bitmask_store, server->cache_consistency_bitmask, - inode, server, NULL); + inode, 0); calldata->arg.bitmask = calldata->arg.bitmask_store; } else calldata->arg.bitmask = NULL; @@ -3841,7 +3836,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f FATTR4_WORD0_FH_EXPIRE_TYPE | FATTR4_WORD0_LINK_SUPPORT | FATTR4_WORD0_SYMLINK_SUPPORT | - FATTR4_WORD0_ACLSUPPORT; + FATTR4_WORD0_ACLSUPPORT | + FATTR4_WORD0_CASE_INSENSITIVE | + FATTR4_WORD0_CASE_PRESERVING; if (minorversion) bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT; @@ -3870,10 +3867,16 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->caps |= NFS_CAP_HARDLINKS; if (res.has_symlinks != 0) server->caps |= NFS_CAP_SYMLINKS; + if (res.case_insensitive) + server->caps |= NFS_CAP_CASE_INSENSITIVE; + if (res.case_preserving) + server->caps |= NFS_CAP_CASE_PRESERVING; #ifdef CONFIG_NFS_V4_SECURITY_LABEL if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) server->caps |= NFS_CAP_SECURITY_LABEL; #endif + if (res.attr_bitmask[0] & FATTR4_WORD0_FS_LOCATIONS) + server->caps |= NFS_CAP_FS_LOCATIONS; if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID)) server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID; if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE)) @@ -3932,6 +3935,114 @@ int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) return err; } +static void test_fs_location_for_trunking(struct nfs4_fs_location *location, + struct nfs_client *clp, + struct nfs_server *server) +{ + int i; + + for (i = 0; i < location->nservers; i++) { + struct nfs4_string *srv_loc = &location->servers[i]; + struct sockaddr addr; + size_t addrlen; + struct xprt_create xprt_args = { + .ident = 0, + .net = clp->cl_net, + }; + struct nfs4_add_xprt_data xprtdata = { + .clp = clp, + }; + struct rpc_add_xprt_test rpcdata = { + .add_xprt_test = clp->cl_mvops->session_trunk, + .data = &xprtdata, + }; + char *servername = NULL; + + if (!srv_loc->len) + continue; + + addrlen = nfs_parse_server_name(srv_loc->data, srv_loc->len, + &addr, sizeof(addr), + clp->cl_net, server->port); + if (!addrlen) + return; + xprt_args.dstaddr = &addr; + xprt_args.addrlen = addrlen; + servername = kmalloc(srv_loc->len + 1, GFP_KERNEL); + if (!servername) + return; + memcpy(servername, srv_loc->data, srv_loc->len); + servername[srv_loc->len] = '\0'; + xprt_args.servername = servername; + + xprtdata.cred = nfs4_get_clid_cred(clp); + rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, + rpc_clnt_setup_test_and_add_xprt, + &rpcdata); + if (xprtdata.cred) + put_cred(xprtdata.cred); + kfree(servername); + } +} + +static int _nfs4_discover_trunking(struct nfs_server *server, + struct nfs_fh *fhandle) +{ + struct nfs4_fs_locations *locations = NULL; + struct page *page; + const struct cred *cred; + struct nfs_client *clp = server->nfs_client; + const struct nfs4_state_maintenance_ops *ops = + clp->cl_mvops->state_renewal_ops; + int status = -ENOMEM, i; + + cred = ops->get_state_renewal_cred(clp); + if (cred == NULL) { + cred = nfs4_get_clid_cred(clp); + if (cred == NULL) + return -ENOKEY; + } + + page = alloc_page(GFP_KERNEL); + locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (page == NULL || locations == NULL) + goto out; + + status = nfs4_proc_get_locations(server, fhandle, locations, page, + cred); + if (status) + goto out; + + for (i = 0; i < locations->nlocations; i++) + test_fs_location_for_trunking(&locations->locations[i], clp, + server); +out: + if (page) + __free_page(page); + kfree(locations); + return status; +} + +static int nfs4_discover_trunking(struct nfs_server *server, + struct nfs_fh *fhandle) +{ + struct nfs4_exception exception = { + .interruptible = true, + }; + struct nfs_client *clp = server->nfs_client; + int err = 0; + + if (!nfs4_has_session(clp)) + goto out; + do { + err = nfs4_handle_exception(server, + _nfs4_discover_trunking(server, fhandle), + &exception); + } while (exception.retry); +out: + return err; +} + static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { @@ -4441,7 +4552,8 @@ static int nfs4_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, return err; } -static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry, + const struct cred *cred) { struct nfs_server *server = NFS_SERVER(inode); struct nfs4_accessargs args = { @@ -4455,7 +4567,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], .rpc_argp = &args, .rpc_resp = &res, - .rpc_cred = entry->cred, + .rpc_cred = cred, }; int status = 0; @@ -4475,14 +4587,15 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry return status; } -static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) +static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry, + const struct cred *cred) { struct nfs4_exception exception = { .interruptible = true, }; int err; do { - err = _nfs4_proc_access(inode, entry); + err = _nfs4_proc_access(inode, entry, cred); trace_nfs4_access(inode, err); err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); @@ -4663,8 +4776,10 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, nfs_fattr_init(res->dir_attr); - if (inode) + if (inode) { nfs4_inode_return_delegation(inode); + nfs_d_prune_case_insensitive_aliases(inode); + } } static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) @@ -4730,6 +4845,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, return 0; if (task->tk_status == 0) { + nfs_d_prune_case_insensitive_aliases(d_inode(data->old_dentry)); if (new_dir != old_dir) { /* Note: If we moved a directory, nlink will change */ nfs4_update_changeattr(old_dir, &res->old_cinfo, @@ -5422,14 +5538,14 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr) return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; } -static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src, - struct inode *inode, struct nfs_server *server, - struct nfs4_label *label) +void nfs4_bitmask_set(__u32 bitmask[], const __u32 src[], + struct inode *inode, unsigned long cache_validity) { - unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); + struct nfs_server *server = NFS_SERVER(inode); unsigned int i; memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ); + cache_validity |= READ_ONCE(NFS_I(inode)->cache_validity); if (cache_validity & NFS_INO_INVALID_CHANGE) bitmask[0] |= FATTR4_WORD0_CHANGE; @@ -5441,8 +5557,6 @@ static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src, bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP; if (cache_validity & NFS_INO_INVALID_NLINK) bitmask[1] |= FATTR4_WORD1_NUMLINKS; - if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL) - bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL; if (cache_validity & NFS_INO_INVALID_CTIME) bitmask[1] |= FATTR4_WORD1_TIME_METADATA; if (cache_validity & NFS_INO_INVALID_MTIME) @@ -5469,7 +5583,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, } else { nfs4_bitmask_set(hdr->args.bitmask_store, server->cache_consistency_bitmask, - hdr->inode, server, NULL); + hdr->inode, NFS_INO_INVALID_BLOCKS); hdr->args.bitmask = hdr->args.bitmask_store; } @@ -6507,8 +6621,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, data->args.fhandle = &data->fh; data->args.stateid = &data->stateid; nfs4_bitmask_set(data->args.bitmask_store, - server->cache_consistency_bitmask, inode, server, - NULL); + server->cache_consistency_bitmask, inode, 0); data->args.bitmask = data->args.bitmask_store; nfs_copy_fh(&data->fh, NFS_FH(inode)); nfs4_stateid_copy(&data->stateid, stateid); @@ -7611,7 +7724,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, const char *key, const void *buf, size_t buflen, int flags) { - struct nfs_access_entry cache; + u32 mask; int ret; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) @@ -7626,8 +7739,8 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, * do a cached access check for the XA* flags to possibly avoid * doing an RPC and getting EACCES back. */ - if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { - if (!(cache.mask & NFS_ACCESS_XAWRITE)) + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XAWRITE)) return -EACCES; } @@ -7648,14 +7761,14 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *key, void *buf, size_t buflen) { - struct nfs_access_entry cache; + u32 mask; ssize_t ret; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) return -EOPNOTSUPP; - if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { - if (!(cache.mask & NFS_ACCESS_XAREAD)) + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XAREAD)) return -EACCES; } @@ -7680,13 +7793,13 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) ssize_t ret, size; char *buf; size_t buflen; - struct nfs_access_entry cache; + u32 mask; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) return 0; - if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { - if (!(cache.mask & NFS_ACCESS_XALIST)) + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XALIST)) return 0; } @@ -7818,18 +7931,18 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, * appended to this compound to identify the client ID which is * performing recovery. */ -static int _nfs40_proc_get_locations(struct inode *inode, +static int _nfs40_proc_get_locations(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs4_fs_locations *locations, struct page *page, const struct cred *cred) { - struct nfs_server *server = NFS_SERVER(inode); struct rpc_clnt *clnt = server->client; u32 bitmask[2] = { [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, }; struct nfs4_fs_locations_arg args = { .clientid = server->nfs_client->cl_clientid, - .fh = NFS_FH(inode), + .fh = fhandle, .page = page, .bitmask = bitmask, .migration = 1, /* skip LOOKUP */ @@ -7875,17 +7988,17 @@ static int _nfs40_proc_get_locations(struct inode *inode, * When the client supports GETATTR(fs_locations_info), it can * be plumbed in here. */ -static int _nfs41_proc_get_locations(struct inode *inode, +static int _nfs41_proc_get_locations(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs4_fs_locations *locations, struct page *page, const struct cred *cred) { - struct nfs_server *server = NFS_SERVER(inode); struct rpc_clnt *clnt = server->client; u32 bitmask[2] = { [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, }; struct nfs4_fs_locations_arg args = { - .fh = NFS_FH(inode), + .fh = fhandle, .page = page, .bitmask = bitmask, .migration = 1, /* skip LOOKUP */ @@ -7919,7 +8032,8 @@ static int _nfs41_proc_get_locations(struct inode *inode, /** * nfs4_proc_get_locations - discover locations for a migrated FSID - * @inode: inode on FSID that is migrating + * @server: pointer to nfs_server to process + * @fhandle: pointer to the kernel NFS client file handle * @locations: result of query * @page: buffer * @cred: credential to use for this operation @@ -7934,11 +8048,11 @@ static int _nfs41_proc_get_locations(struct inode *inode, * -NFS4ERR_LEASE_MOVED is returned if the server still has leases * from this client that require migration recovery. */ -int nfs4_proc_get_locations(struct inode *inode, +int nfs4_proc_get_locations(struct nfs_server *server, + struct nfs_fh *fhandle, struct nfs4_fs_locations *locations, struct page *page, const struct cred *cred) { - struct nfs_server *server = NFS_SERVER(inode); struct nfs_client *clp = server->nfs_client; const struct nfs4_mig_recovery_ops *ops = clp->cl_mvops->mig_recovery_ops; @@ -7951,10 +8065,11 @@ int nfs4_proc_get_locations(struct inode *inode, (unsigned long long)server->fsid.major, (unsigned long long)server->fsid.minor, clp->cl_hostname); - nfs_display_fhandle(NFS_FH(inode), __func__); + nfs_display_fhandle(fhandle, __func__); do { - status = ops->get_locations(inode, locations, page, cred); + status = ops->get_locations(server, fhandle, locations, page, + cred); if (status != -NFS4ERR_DELAY) break; nfs4_handle_exception(server, status, &exception); @@ -10423,6 +10538,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { .free_client = nfs4_free_client, .create_server = nfs4_create_server, .clone_server = nfs_clone_server, + .discover_trunking = nfs4_discover_trunking, }; static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index d88b779f9dd0a99a57bbbd3899d019edfa5c7c26..f5a62c0d999b436891f36a9a1813dee569075154 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2098,7 +2098,8 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred } inode = d_inode(server->super->s_root); - result = nfs4_proc_get_locations(inode, locations, page, cred); + result = nfs4_proc_get_locations(server, NFS_FH(inode), locations, + page, cred); if (result) { dprintk("<-- %s: failed to retrieve fs_locations: %d\n", __func__, result); @@ -2106,6 +2107,9 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred } result = -NFS4ERR_NXIO; + if (!locations->nlocations) + goto out; + if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { dprintk("<-- %s: No fs_locations data, migration skipped\n", __func__); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 69862bf6db0015cee29e0a7907ce9e728f157432..8e70b92df4cc8a946674af1a58c32c0cf634c9e0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3533,6 +3533,42 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint return 0; } +static int decode_attr_case_insensitive(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + __be32 *p; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_CASE_INSENSITIVE - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_CASE_INSENSITIVE)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + *res = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_CASE_INSENSITIVE; + } + dprintk("%s: case_insensitive=%s\n", __func__, *res == 0 ? "false" : "true"); + return 0; +} + +static int decode_attr_case_preserving(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) +{ + __be32 *p; + + *res = 0; + if (unlikely(bitmap[0] & (FATTR4_WORD0_CASE_PRESERVING - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_CASE_PRESERVING)) { + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + return -EIO; + *res = be32_to_cpup(p); + bitmap[0] &= ~FATTR4_WORD0_CASE_PRESERVING; + } + dprintk("%s: case_preserving=%s\n", __func__, *res == 0 ? "false" : "true"); + return 0; +} + static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) { __be32 *p; @@ -3696,8 +3732,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st if (unlikely(!p)) goto out_eio; n = be32_to_cpup(p); - if (n <= 0) - goto out_eio; for (res->nlocations = 0; res->nlocations < n; res->nlocations++) { u32 m; struct nfs4_fs_location *loc; @@ -4200,10 +4234,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, } else printk(KERN_WARNING "%s: label too long (%u)!\n", __func__, len); + if (label && label->label) + dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n", + __func__, label->len, (char *)label->label, + label->len, label->pi, label->lfs); } - if (label && label->label) - dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, - (char *)label->label, label->len, label->pi, label->lfs); return status; } @@ -4412,6 +4447,10 @@ static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_re goto xdr_error; if ((status = decode_attr_aclsupport(xdr, bitmap, &res->acl_bitmask)) != 0) goto xdr_error; + if ((status = decode_attr_case_insensitive(xdr, bitmap, &res->case_insensitive)) != 0) + goto xdr_error; + if ((status = decode_attr_case_preserving(xdr, bitmap, &res->case_preserving)) != 0) + goto xdr_error; if ((status = decode_attr_exclcreat_supported(xdr, bitmap, res->exclcreat_bitmask)) != 0) goto xdr_error; diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c index 8cb70755e3c9eaaaae9fec04e9e9789d44ebc4ba..a6f740366963100b24a8fd8d6cdac139385283b3 100644 --- a/fs/nfs/sysfs.c +++ b/fs/nfs/sysfs.c @@ -142,10 +142,11 @@ static struct attribute *nfs_netns_client_attrs[] = { &nfs_netns_client_id.attr, NULL, }; +ATTRIBUTE_GROUPS(nfs_netns_client); static struct kobj_type nfs_netns_client_type = { .release = nfs_netns_client_release, - .default_attrs = nfs_netns_client_attrs, + .default_groups = nfs_netns_client_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = nfs_netns_client_namespace, }; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 8ef53f6726ec8b26b9f027c73785141e86fdb1fe..936eebd4c56dca2e92e6172cf6d2781e49cb13c5 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -150,13 +150,17 @@ nfsd3_proc_read(struct svc_rqst *rqstp) unsigned int len; int v; - argp->count = min_t(u32, argp->count, max_blocksize); - dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n", SVCFH_fmt(&argp->fh), (unsigned long) argp->count, (unsigned long long) argp->offset); + argp->count = min_t(u32, argp->count, max_blocksize); + if (argp->offset > (u64)OFFSET_MAX) + argp->offset = (u64)OFFSET_MAX; + if (argp->offset + argp->count > (u64)OFFSET_MAX) + argp->count = (u64)OFFSET_MAX - argp->offset; + v = 0; len = argp->count; resp->pages = rqstp->rq_next_page; @@ -199,6 +203,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp) (unsigned long long) argp->offset, argp->stable? " stable" : ""); + resp->status = nfserr_fbig; + if (argp->offset > (u64)OFFSET_MAX || + argp->offset + argp->len > (u64)OFFSET_MAX) + return rpc_success; + fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; nvecs = svc_fill_write_vector(rqstp, &argp->payload); @@ -651,15 +660,9 @@ nfsd3_proc_commit(struct svc_rqst *rqstp) argp->count, (unsigned long long) argp->offset); - if (argp->offset > NFS_OFFSET_MAX) { - resp->status = nfserr_inval; - goto out; - } - fh_copy(&resp->fh, &argp->fh); resp->status = nfsd_commit(rqstp, &resp->fh, argp->offset, argp->count, resp->verf); -out: return rpc_success; } diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 7c45ba4db61be641078e509dbe2ed3705eee4c21..0293b8d65f10fc948c212f653dd9f3e4121a3b48 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -254,7 +254,7 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, if (xdr_stream_decode_u64(xdr, &newsize) < 0) return false; iap->ia_valid |= ATTR_SIZE; - iap->ia_size = min_t(u64, newsize, NFS_OFFSET_MAX); + iap->ia_size = newsize; } if (xdr_stream_decode_u32(xdr, &set_it) < 0) return false; @@ -1060,7 +1060,7 @@ svcxdr_encode_entry3_common(struct nfsd3_readdirres *resp, const char *name, return false; /* cookie */ resp->cookie_offset = dirlist->len; - if (xdr_stream_encode_u64(xdr, NFS_OFFSET_MAX) < 0) + if (xdr_stream_encode_u64(xdr, OFFSET_MAX) < 0) return false; return true; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ed1ee25647befa80f0aed4f374fe8e33ee4f724d..b207c76a873f3a86ca095c035505b4e07f82f61a 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -782,12 +782,16 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, __be32 status; read->rd_nf = NULL; - if (read->rd_offset >= OFFSET_MAX) - return nfserr_inval; trace_nfsd_read_start(rqstp, &cstate->current_fh, read->rd_offset, read->rd_length); + read->rd_length = min_t(u32, read->rd_length, svc_max_payload(rqstp)); + if (read->rd_offset > (u64)OFFSET_MAX) + read->rd_offset = (u64)OFFSET_MAX; + if (read->rd_offset + read->rd_length > (u64)OFFSET_MAX) + read->rd_length = (u64)OFFSET_MAX - read->rd_offset; + /* * If we do a zero copy read, then a client will see read data * that reflects the state of the file *after* performing the @@ -1018,8 +1022,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, unsigned long cnt; int nvecs; - if (write->wr_offset >= OFFSET_MAX) - return nfserr_inval; + if (write->wr_offset > (u64)OFFSET_MAX || + write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX) + return nfserr_fbig; cnt = write->wr_buflen; trace_nfsd_write_start(rqstp, &cstate->current_fh, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 72900b89cf84cf0bcbe981701ebc764847fd6c05..32063733443d4ed93d39b43fb0e5f5ced1c0d5bf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4130,8 +4130,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, status = nfserr_clid_inuse; if (client_has_state(old) && !same_creds(&unconf->cl_cred, - &old->cl_cred)) + &old->cl_cred)) { + old = NULL; goto out; + } status = mark_client_expired_locked(old); if (status) { old = NULL; diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 899de438e52901d1509ec270288e8ca12ef49638..714a3a3bd50c3d524fc946f71ae18c694de0368d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3495,7 +3495,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, p = xdr_reserve_space(xdr, 3*4 + namlen); if (!p) goto fail; - p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */ + p = xdr_encode_hyper(p, OFFSET_MAX); /* offset of next entry */ p = xdr_encode_array(p, name, namlen); /* name length & name */ nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen); @@ -3986,10 +3986,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, } xdr_commit_encode(xdr); - maxcount = svc_max_payload(resp->rqstp); - maxcount = min_t(unsigned long, maxcount, + maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); - maxcount = min_t(unsigned long, maxcount, read->rd_length); if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) @@ -4826,10 +4824,8 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr, return nfserr_resource; xdr_commit_encode(xdr); - maxcount = svc_max_payload(resp->rqstp); - maxcount = min_t(unsigned long, maxcount, + maxcount = min_t(unsigned long, read->rd_length, (xdr->buf->buflen - xdr->buf->len)); - maxcount = min_t(unsigned long, maxcount, read->rd_length); count = maxcount; eof = read->rd_offset >= i_size_read(file_inode(file)); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b9f27fbcd7684ed8121d82e625f8e037c3d32b7c..68b020f2002b7cda75bf371ca620ffb3d06e129b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1247,7 +1247,8 @@ static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) clear_ncl(d_inode(dentry)); dget(dentry); ret = simple_unlink(dir, dentry); - d_delete(dentry); + d_drop(dentry); + fsnotify_unlink(dir, dentry); dput(dentry); WARN_ON_ONCE(ret); } @@ -1338,8 +1339,8 @@ void nfsd_client_rmdir(struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); WARN_ON_ONCE(ret); + d_drop(dentry); fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); inode_unlock(dir); } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c4cf56327843024ef0acac0c5211dd3b1903962c..5889db66409df5ae030ded1de830f1d3d6ccd247 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -306,14 +306,14 @@ TRACE_EVENT(nfsd_export_update, DECLARE_EVENT_CLASS(nfsd_io_class, TP_PROTO(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, - unsigned long len), + u64 offset, + u32 len), TP_ARGS(rqstp, fhp, offset, len), TP_STRUCT__entry( __field(u32, xid) __field(u32, fh_hash) - __field(loff_t, offset) - __field(unsigned long, len) + __field(u64, offset) + __field(u32, len) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqstp->rq_xid); @@ -321,7 +321,7 @@ DECLARE_EVENT_CLASS(nfsd_io_class, __entry->offset = offset; __entry->len = len; ), - TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu", + TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu len=%u", __entry->xid, __entry->fh_hash, __entry->offset, __entry->len) ) @@ -330,8 +330,8 @@ DECLARE_EVENT_CLASS(nfsd_io_class, DEFINE_EVENT(nfsd_io_class, nfsd_##name, \ TP_PROTO(struct svc_rqst *rqstp, \ struct svc_fh *fhp, \ - loff_t offset, \ - unsigned long len), \ + u64 offset, \ + u32 len), \ TP_ARGS(rqstp, fhp, offset, len)) DEFINE_NFSD_IO_EVENT(read_start); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 99c2b9dfbb1046bdb99b4bcf3c806858c9e7cae9..91600e71be19183f82158663540caa2b55d02db2 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -435,6 +435,10 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, .ia_size = iap->ia_size, }; + host_err = -EFBIG; + if (iap->ia_size < 0) + goto out_unlock; + host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL); if (host_err) goto out_unlock; @@ -1110,42 +1114,61 @@ out: } #ifdef CONFIG_NFSD_V3 -/* - * Commit all pending writes to stable storage. +/** + * nfsd_commit - Commit pending writes to stable storage + * @rqstp: RPC request being processed + * @fhp: NFS filehandle + * @offset: raw offset from beginning of file + * @count: raw count of bytes to sync + * @verf: filled in with the server's current write verifier * - * Note: we only guarantee that data that lies within the range specified - * by the 'offset' and 'count' parameters will be synced. + * Note: we guarantee that data that lies within the range specified + * by the 'offset' and 'count' parameters will be synced. The server + * is permitted to sync data that lies outside this range at the + * same time. * * Unfortunately we cannot lock the file to make sure we return full WCC * data to the client, as locking happens lower down in the filesystem. + * + * Return values: + * An nfsstat value in network byte order. */ __be32 -nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, unsigned long count, __be32 *verf) +nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp, u64 offset, + u32 count, __be32 *verf) { + u64 maxbytes; + loff_t start, end; struct nfsd_net *nn; struct nfsd_file *nf; - loff_t end = LLONG_MAX; - __be32 err = nfserr_inval; - - if (offset < 0) - goto out; - if (count != 0) { - end = offset + (loff_t)count - 1; - if (end < offset) - goto out; - } + __be32 err; err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &nf); if (err) goto out; + + /* + * Convert the client-provided (offset, count) range to a + * (start, end) range. If the client-provided range falls + * outside the maximum file size of the underlying FS, + * clamp the sync range appropriately. + */ + start = 0; + end = LLONG_MAX; + maxbytes = (u64)fhp->fh_dentry->d_sb->s_maxbytes; + if (offset < maxbytes) { + start = offset; + if (count && (offset + count - 1 < maxbytes)) + end = offset + count - 1; + } + nn = net_generic(nf->nf_net, nfsd_net_id); if (EX_ISSYNC(fhp->fh_export)) { errseq_t since = READ_ONCE(nf->nf_file->f_wb_err); int err2; - err2 = vfs_fsync_range(nf->nf_file, offset, end, 0); + err2 = vfs_fsync_range(nf->nf_file, start, end, 0); switch (err2) { case 0: nfsd_copy_write_verifier(verf, nn); diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 9f56dcb22ff72adeb34fa71fe842e2815086b7c1..2c43d10e3cab419642cd0b28de4c3e55ff1d7cc2 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -74,8 +74,8 @@ __be32 do_nfsd_create(struct svc_rqst *, struct svc_fh *, char *name, int len, struct iattr *attrs, struct svc_fh *res, int createmode, u32 *verifier, bool *truncp, bool *created); -__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *, - loff_t, unsigned long, __be32 *verf); +__be32 nfsd_commit(struct svc_rqst *rqst, struct svc_fh *fhp, + u64 offset, u32 count, __be32 *verf); #endif /* CONFIG_NFSD_V3 */ #ifdef CONFIG_NFSD_V4 __be32 nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index d5ebebb034ffe4d79fd185020e0f8bd23b50fd00..829dd4a61b6691ded306d4148490d448fc6551f6 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -19,7 +19,25 @@ #include #include -int dir_notify_enable __read_mostly = 1; +static int dir_notify_enable __read_mostly = 1; +#ifdef CONFIG_SYSCTL +static struct ctl_table dnotify_sysctls[] = { + { + .procname = "dir-notify-enable", + .data = &dir_notify_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + {} +}; +static void __init dnotify_sysctl_init(void) +{ + register_sysctl_init("fs", dnotify_sysctls); +} +#else +#define dnotify_sysctl_init() do { } while (0) +#endif static struct kmem_cache *dnotify_struct_cache __read_mostly; static struct kmem_cache *dnotify_mark_cache __read_mostly; @@ -386,6 +404,7 @@ static int __init dnotify_init(void) dnotify_group = fsnotify_alloc_group(&dnotify_fsnotify_ops); if (IS_ERR(dnotify_group)) panic("unable to allocate fsnotify group for dnotify\n"); + dnotify_sysctl_init(); return 0; } diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 73a3e939c9216004b496ae4936e69c3166865818..2ff6bd85ba8f6abc70deaf9fd5192ef9cf2419b8 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -59,7 +59,7 @@ static int fanotify_max_queued_events __read_mostly; static long ft_zero = 0; static long ft_int_max = INT_MAX; -struct ctl_table fanotify_table[] = { +static struct ctl_table fanotify_table[] = { { .procname = "max_user_groups", .data = &init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS], @@ -88,6 +88,13 @@ struct ctl_table fanotify_table[] = { }, { } }; + +static void __init fanotify_sysctls_init(void) +{ + register_sysctl("fs/fanotify", fanotify_table); +} +#else +#define fanotify_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ /* @@ -151,7 +158,6 @@ static size_t fanotify_event_len(unsigned int info_mode, struct fanotify_event *event) { size_t event_len = FAN_EVENT_METADATA_LEN; - struct fanotify_info *info; int fh_len; int dot_len = 0; @@ -161,8 +167,6 @@ static size_t fanotify_event_len(unsigned int info_mode, if (fanotify_is_error_event(event->mask)) event_len += FANOTIFY_ERROR_INFO_LEN; - info = fanotify_event_info(event); - if (fanotify_event_has_any_dir_fh(event)) { event_len += fanotify_dir_name_info_len(event); } else if ((info_mode & FAN_REPORT_NAME) && @@ -697,9 +701,6 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (fanotify_is_perm_event(event->mask)) FANOTIFY_PERM(event)->fd = fd; - if (f) - fd_install(fd, f); - if (info_mode) { ret = copy_info_records_to_user(event, info, info_mode, pidfd, buf, count); @@ -707,6 +708,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, goto out_close_fd; } + if (f) + fd_install(fd, f); + return metadata.event_len; out_close_fd: @@ -1743,6 +1747,7 @@ static int __init fanotify_user_setup(void) init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = FANOTIFY_DEFAULT_MAX_GROUPS; init_user_ns.ucount_max[UCOUNT_FANOTIFY_MARKS] = max_marks; + fanotify_sysctls_init(); return 0; } diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 29fca3284bb5c83c25a669b213dfa3da870631ed..54583f62dc4403b37ec7a073ae61a16ce055733e 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -58,7 +58,7 @@ struct kmem_cache *inotify_inode_mark_cachep __read_mostly; static long it_zero = 0; static long it_int_max = INT_MAX; -struct ctl_table inotify_table[] = { +static struct ctl_table inotify_table[] = { { .procname = "max_user_instances", .data = &init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES], @@ -87,6 +87,14 @@ struct ctl_table inotify_table[] = { }, { } }; + +static void __init inotify_sysctls_init(void) +{ + register_sysctl("fs/inotify", inotify_table); +} + +#else +#define inotify_sysctls_init() do { } while (0) #endif /* CONFIG_SYSCTL */ static inline __u32 inotify_arg_to_mask(struct inode *inode, u32 arg) @@ -849,6 +857,7 @@ static int __init inotify_user_setup(void) inotify_max_queued_events = 16384; init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128; init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max; + inotify_sysctls_init(); return 0; } diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index 8aaec7e0804efaa4b66727bf0d7360f68abba8c0..fb825059d4886783c414d42bc4f46940c0cdb0ba 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -11,7 +11,6 @@ #include #include -#include #include #include #include diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f89ffcbd585ffa4575fa499a7ad646d91b04943c..a17be1618bf707471e0b8417fc9c931369d041d6 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -379,7 +379,7 @@ static void o2hb_nego_timeout(struct work_struct *work) o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); /* lowest node as master node to make negotiate decision. */ - master_node = find_next_bit(live_node_bitmap, O2NM_MAX_NODES, 0); + master_node = find_first_bit(live_node_bitmap, O2NM_MAX_NODES); if (master_node == o2nm_this_node()) { if (!test_bit(master_node, reg->hr_nego_node_bitmap)) { diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 9f90fc9551e1a6d94652179624cd4b6a6ed91b00..c4eccd499db8a3eedc3ce024765325985e4fee00 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1045,7 +1045,7 @@ static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) int status, ret = 0, i; char *p; - if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) + if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES) goto bail; qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); @@ -1217,7 +1217,7 @@ static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) struct o2nm_node *node; int ret = 0, status, count, i; - if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) + if (find_first_bit(node_map, O2NM_MAX_NODES) >= O2NM_MAX_NODES) goto bail; qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 9b88219febb5264d50677cdc692f69dca090b089..227da5b1b6ab1008a670d0147b0106adb1e5d6a6 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -861,7 +861,7 @@ lookup: * to see if there are any nodes that still need to be * considered. these will not appear in the mle nodemap * but they might own this lockres. wait on them. */ - bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES); if (bit < O2NM_MAX_NODES) { mlog(0, "%s: res %.*s, At least one node (%d) " "to recover before lock mastery can begin\n", @@ -912,7 +912,7 @@ redo_request: dlm_wait_for_recovery(dlm); spin_lock(&dlm->spinlock); - bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES); if (bit < O2NM_MAX_NODES) { mlog(0, "%s: res %.*s, At least one node (%d) " "to recover before lock mastery can begin\n", @@ -1079,7 +1079,7 @@ recheck: sleep = 1; /* have all nodes responded? */ if (voting_done && !*blocked) { - bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES); if (dlm->node_num <= bit) { /* my node number is lowest. * now tell other nodes that I am @@ -1234,8 +1234,8 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, } else { mlog(ML_ERROR, "node down! %d\n", node); if (blocked) { - int lowest = find_next_bit(mle->maybe_map, - O2NM_MAX_NODES, 0); + int lowest = find_first_bit(mle->maybe_map, + O2NM_MAX_NODES); /* act like it was never there */ clear_bit(node, mle->maybe_map); @@ -1795,7 +1795,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data, "MLE for it! (%.*s)\n", assert->node_idx, namelen, name); } else { - int bit = find_next_bit (mle->maybe_map, O2NM_MAX_NODES, 0); + int bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES); if (bit >= O2NM_MAX_NODES) { /* not necessarily an error, though less likely. * could be master just re-asserting. */ @@ -2521,7 +2521,7 @@ static int dlm_is_lockres_migratable(struct dlm_ctxt *dlm, } if (!nonlocal) { - node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + node_ref = find_first_bit(res->refmap, O2NM_MAX_NODES); if (node_ref >= O2NM_MAX_NODES) return 0; } @@ -3303,7 +3303,7 @@ static void dlm_clean_block_mle(struct dlm_ctxt *dlm, BUG_ON(mle->type != DLM_MLE_BLOCK); spin_lock(&mle->spinlock); - bit = find_next_bit(mle->maybe_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(mle->maybe_map, O2NM_MAX_NODES); if (bit != dead_node) { mlog(0, "mle found, but dead node %u would not have been " "master\n", dead_node); @@ -3542,7 +3542,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm) spin_lock(&dlm->master_lock); BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); - BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); + BUG_ON((find_first_bit(dlm->domain_map, O2NM_MAX_NODES) < O2NM_MAX_NODES)); for (i = 0; i < DLM_HASH_BUCKETS; i++) { bucket = dlm_master_hash(dlm, i); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 5cd5f7511dacd8908167da64092327d03551cbe3..52ad342fec3ef20ddae71346f813fecdd0da3dc0 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -451,7 +451,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) { int bit; - bit = find_next_bit (dlm->recovery_map, O2NM_MAX_NODES, 0); + bit = find_first_bit(dlm->recovery_map, O2NM_MAX_NODES); if (bit >= O2NM_MAX_NODES || bit < 0) dlm_set_reco_dead_node(dlm, O2NM_INVALID_NODE_NUM); else diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index c350bd4df7701df6f451d7716ae99932e6d5b654..eedf07ca23ca4181e9fa7e85bd182232a7f0d4aa 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -92,7 +92,7 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res) return 0; /* Another node has this resource with this node as the master */ - bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + bit = find_first_bit(res->refmap, O2NM_MAX_NODES); if (bit < O2NM_MAX_NODES) return 0; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 16f1bfc407f2a18124fbd3288c8518f24931d90e..dd77b7aaabf5c942ce3c43a2cac9e21c1f83aeeb 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -661,42 +661,8 @@ static struct ctl_table ocfs2_nm_table[] = { { } }; -static struct ctl_table ocfs2_mod_table[] = { - { - .procname = "nm", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_nm_table - }, - { } -}; - -static struct ctl_table ocfs2_kern_table[] = { - { - .procname = "ocfs2", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_mod_table - }, - { } -}; - -static struct ctl_table ocfs2_root_table[] = { - { - .procname = "fs", - .data = NULL, - .maxlen = 0, - .mode = 0555, - .child = ocfs2_kern_table - }, - { } -}; - static struct ctl_table_header *ocfs2_table_header; - /* * Initialization */ @@ -705,7 +671,7 @@ static int __init ocfs2_stack_glue_init(void) { strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); - ocfs2_table_header = register_sysctl_table(ocfs2_root_table); + ocfs2_table_header = register_sysctl("fs/ocfs2/nm", ocfs2_nm_table); if (!ocfs2_table_header) { printk(KERN_ERR "ocfs2 stack glue: unable to register sysctl\n"); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 481017e1dac5ac64100850f4b8b59f84f2a7c9f6..166c8918c825a60b755816c384ec22bbed3d1b70 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1251,26 +1251,23 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, { struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; struct journal_head *jh; - int ret = 1; + int ret; if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) return 0; - if (!buffer_jbd(bg_bh)) + jh = jbd2_journal_grab_journal_head(bg_bh); + if (!jh) return 1; - jbd_lock_bh_journal_head(bg_bh); - if (buffer_jbd(bg_bh)) { - jh = bh2jh(bg_bh); - spin_lock(&jh->b_state_lock); - bg = (struct ocfs2_group_desc *) jh->b_committed_data; - if (bg) - ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); - else - ret = 1; - spin_unlock(&jh->b_state_lock); - } - jbd_unlock_bh_journal_head(bg_bh); + spin_lock(&jh->b_state_lock); + bg = (struct ocfs2_group_desc *) jh->b_committed_data; + if (bg) + ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); + else + ret = 1; + spin_unlock(&jh->b_state_lock); + jbd2_journal_put_journal_head(jh); return ret; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 1286b88b6fa17917e0f76d351621f933e865bf56..2772dec9dcea4c4871e65c05dd5b68fbf73efcdf 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #define CREATE_TRACE_POINTS @@ -2283,7 +2282,6 @@ static int ocfs2_initialize_super(struct super_block *sb, mlog_errno(status); goto bail; } - cleancache_init_shared_fs(sb); osb->ocfs2_wq = alloc_ordered_workqueue("ocfs2_wq", WQ_MEM_RECLAIM); if (!osb->ocfs2_wq) { diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index b193d08a3dc367e23213d22eedd5dcbcf8a09d67..e040970408d4ff2b57c3e64518961eacbf66113a 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -145,7 +145,7 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, if (err == -ENOTTY || err == -EINVAL) return 0; pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n", - old, err); + old->dentry, err); return err; } @@ -157,7 +157,9 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, */ if (oldfa.flags & OVL_PROT_FS_FLAGS_MASK) { err = ovl_set_protattr(inode, new->dentry, &oldfa); - if (err) + if (err == -EPERM) + pr_warn_once("copying fileattr: no xattr on upper\n"); + else if (err) return err; } @@ -167,8 +169,16 @@ static int ovl_copy_fileattr(struct inode *inode, struct path *old, err = ovl_real_fileattr_get(new, &newfa); if (err) { + /* + * Returning an error if upper doesn't support fileattr will + * result in a regression, so revert to the old behavior. + */ + if (err == -ENOTTY || err == -EINVAL) { + pr_warn_once("copying fileattr: no support on upper\n"); + return 0; + } pr_warn("failed to retrieve upper fileattr (%pd2, err=%i)\n", - new, err); + new->dentry, err); return err; } diff --git a/fs/pipe.c b/fs/pipe.c index 6d4342bad9f15b258134fec2374955588f901de4..cc28623a67b61422472d05b9b040e88e640c99d8 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -50,13 +51,13 @@ * The max size that a non-root user is allowed to grow the pipe. Can * be set by root in /proc/sys/fs/pipe-max-size */ -unsigned int pipe_max_size = 1048576; +static unsigned int pipe_max_size = 1048576; /* Maximum allocatable pages per user. Hard limit is unset by default, soft * matches default values. */ -unsigned long pipe_user_pages_hard; -unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; +static unsigned long pipe_user_pages_hard; +static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; /* * We use head and tail indices that aren't masked off, except at the point of @@ -1428,6 +1429,60 @@ static struct file_system_type pipe_fs_type = { .kill_sb = kill_anon_super, }; +#ifdef CONFIG_SYSCTL +static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, + unsigned int *valp, + int write, void *data) +{ + if (write) { + unsigned int val; + + val = round_pipe_size(*lvalp); + if (val == 0) + return -EINVAL; + + *valp = val; + } else { + unsigned int val = *valp; + *lvalp = (unsigned long) val; + } + + return 0; +} + +static int proc_dopipe_max_size(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + return do_proc_douintvec(table, write, buffer, lenp, ppos, + do_proc_dopipe_max_size_conv, NULL); +} + +static struct ctl_table fs_pipe_sysctls[] = { + { + .procname = "pipe-max-size", + .data = &pipe_max_size, + .maxlen = sizeof(pipe_max_size), + .mode = 0644, + .proc_handler = proc_dopipe_max_size, + }, + { + .procname = "pipe-user-pages-hard", + .data = &pipe_user_pages_hard, + .maxlen = sizeof(pipe_user_pages_hard), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "pipe-user-pages-soft", + .data = &pipe_user_pages_soft, + .maxlen = sizeof(pipe_user_pages_soft), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, + { } +}; +#endif + static int __init init_pipe_fs(void) { int err = register_filesystem(&pipe_fs_type); @@ -1439,6 +1494,9 @@ static int __init init_pipe_fs(void) unregister_filesystem(&pipe_fs_type); } } +#ifdef CONFIG_SYSCTL + register_sysctl_init("fs", fs_pipe_sysctls); +#endif return err; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 5b78739e60e406c75abfc7677f41ea5c5806b554..f2132407e1335755bfe85fec2a41995bd36d2620 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -791,12 +791,6 @@ void proc_remove(struct proc_dir_entry *de) } EXPORT_SYMBOL(proc_remove); -void *PDE_DATA(const struct inode *inode) -{ - return __PDE_DATA(inode); -} -EXPORT_SYMBOL(PDE_DATA); - /* * Pull a user buffer into memory and pass it to the file's write handler if * one is supplied. The ->write() method is permitted to modify the diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 599eb724ff2d57d3c2f315952a28cbc8eda804a3..f84355c5a36d3482fcf29d1206a4d725ef67504b 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -650,6 +650,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de) return NULL; } + inode->i_private = de->data; inode->i_ino = de->low_ino; inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); PROC_I(inode)->pde = de; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 03415f3fb3a8184742d73584c7414490a273af25..06a80f78433d8b40f48aa9419f3b51c9e8f77f65 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -115,11 +115,6 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode) return PROC_I(inode)->pde; } -static inline void *__PDE_DATA(const struct inode *inode) -{ - return PDE(inode)->data; -} - static inline struct pid *proc_pid(const struct inode *inode) { return PROC_I(inode)->pid; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 39b823ab2564edf62fd08983ec44560b1120ee24..e1cfeda397f3fb88004854cd15e4087b48e7a54d 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -138,7 +138,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data); * @parent: The parent directory in which to create. * @ops: The seq_file ops with which to read the file. * @write: The write method with which to 'modify' the file. - * @data: Data for retrieval by PDE_DATA(). + * @data: Data for retrieval by pde_data(). * * Create a network namespaced proc file in the @parent directory with the * specified @name and @mode that allows reading of a file that displays a @@ -153,7 +153,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_data); * modified by the @write function. @write should return 0 on success. * * The @data value is accessible from the @show and @write functions by calling - * PDE_DATA() on the file inode. The network namespace must be accessed by + * pde_data() on the file inode. The network namespace must be accessed by * calling seq_file_net() on the seq_file struct. */ struct proc_dir_entry *proc_create_net_data_write(const char *name, umode_t mode, @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_single); * @parent: The parent directory in which to create. * @show: The seqfile show method with which to read the file. * @write: The write method with which to 'modify' the file. - * @data: Data for retrieval by PDE_DATA(). + * @data: Data for retrieval by pde_data(). * * Create a network-namespaced proc file in the @parent directory with the * specified @name and @mode that allows reading of a file that displays a @@ -245,7 +245,7 @@ EXPORT_SYMBOL_GPL(proc_create_net_single); * modified by the @write function. @write should return 0 on success. * * The @data value is accessible from the @show and @write functions by calling - * PDE_DATA() on the file inode. The network namespace must be accessed by + * pde_data() on the file inode. The network namespace must be accessed by * calling seq_file_single_net() on the seq_file struct. */ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mode, diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 389e1e42e7d9a0d650e31e80bc6173e3041f2f15..7d9cfc730bd4c345ccbd34f65e19c31c51fa2536 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -25,15 +26,32 @@ static const struct file_operations proc_sys_dir_file_operations; static const struct inode_operations proc_sys_dir_operations; /* shared constants to be used in various sysctls */ -const int sysctl_vals[] = { 0, 1, INT_MAX }; +const int sysctl_vals[] = { -1, 0, 1, 2, 4, 100, 200, 1000, 3000, INT_MAX, 65535 }; EXPORT_SYMBOL(sysctl_vals); +const unsigned long sysctl_long_vals[] = { 0, 1, LONG_MAX }; +EXPORT_SYMBOL_GPL(sysctl_long_vals); + /* Support for permanently empty directories */ struct ctl_table sysctl_mount_point[] = { { } }; +/** + * register_sysctl_mount_point() - registers a sysctl mount point + * @path: path for the mount point + * + * Used to create a permanently empty directory to serve as mount point. + * There are some subtle but important permission checks this allows in the + * case of unprivileged mounts. + */ +struct ctl_table_header *register_sysctl_mount_point(const char *path) +{ + return register_sysctl(path, sysctl_mount_point); +} +EXPORT_SYMBOL(register_sysctl_mount_point); + static bool is_empty_dir(struct ctl_table_header *head) { return head->ctl_table[0].child == sysctl_mount_point; @@ -1383,6 +1401,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab } EXPORT_SYMBOL(register_sysctl); +/** + * __register_sysctl_init() - register sysctl table to path + * @path: path name for sysctl base + * @table: This is the sysctl table that needs to be registered to the path + * @table_name: The name of sysctl table, only used for log printing when + * registration fails + * + * The sysctl interface is used by userspace to query or modify at runtime + * a predefined value set on a variable. These variables however have default + * values pre-set. Code which depends on these variables will always work even + * if register_sysctl() fails. If register_sysctl() fails you'd just loose the + * ability to query or modify the sysctls dynamically at run time. Chances of + * register_sysctl() failing on init are extremely low, and so for both reasons + * this function does not return any error as it is used by initialization code. + * + * Context: Can only be called after your respective sysctl base path has been + * registered. So for instance, most base directories are registered early on + * init before init levels are processed through proc_sys_init() and + * sysctl_init_bases(). + */ +void __init __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name) +{ + struct ctl_table_header *hdr = register_sysctl(path, table); + + if (unlikely(!hdr)) { + pr_err("failed when register_sysctl %s to %s\n", table_name, path); + return; + } + kmemleak_not_leak(hdr); +} + static char *append_path(const char *path, char *pos, const char *name) { int namelen; @@ -1596,6 +1646,15 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table) } EXPORT_SYMBOL(register_sysctl_table); +int __register_sysctl_base(struct ctl_table *base_table) +{ + struct ctl_table_header *hdr; + + hdr = register_sysctl_table(base_table); + kmemleak_not_leak(hdr); + return 0; +} + static void put_links(struct ctl_table_header *header) { struct ctl_table_set *root_set = &sysctl_table_root.default_set; @@ -1709,7 +1768,7 @@ int __init proc_sys_init(void) proc_sys_root->proc_dir_ops = &proc_sys_dir_file_operations; proc_sys_root->nlink = 0; - return sysctl_init(); + return sysctl_init_bases(); } struct sysctl_alias { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 18f8c3acbb85e8a90bf9a5395042bf5975987ae9..6e97ed775074c893d323afd054e1fd62f019aa84 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -440,7 +440,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, } static void smaps_account(struct mem_size_stats *mss, struct page *page, - bool compound, bool young, bool dirty, bool locked) + bool compound, bool young, bool dirty, bool locked, + bool migration) { int i, nr = compound ? compound_nr(page) : 1; unsigned long size = nr * PAGE_SIZE; @@ -467,8 +468,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, * page_count(page) == 1 guarantees the page is mapped exactly once. * If any subpage of the compound page mapped with PTE it would elevate * page_count(). + * + * The page_mapcount() is called to get a snapshot of the mapcount. + * Without holding the page lock this snapshot can be slightly wrong as + * we cannot always read the mapcount atomically. It is not safe to + * call page_mapcount() even with PTL held if the page is not mapped, + * especially for migration entries. Treat regular migration entries + * as mapcount == 1. */ - if (page_count(page) == 1) { + if ((page_count(page) == 1) || migration) { smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, locked, true); return; @@ -517,6 +525,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool migration = false; if (pte_present(*pte)) { page = vm_normal_page(vma, addr, *pte); @@ -536,8 +545,11 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } else { mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } - } else if (is_pfn_swap_entry(swpent)) + } else if (is_pfn_swap_entry(swpent)) { + if (is_migration_entry(swpent)) + migration = true; page = pfn_swap_entry_to_page(swpent); + } } else { smaps_pte_hole_lookup(addr, walk); return; @@ -546,7 +558,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); + smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), + locked, migration); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -557,6 +570,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool migration = false; if (pmd_present(*pmd)) { /* FOLL_DUMP will return -EFAULT on huge zero page */ @@ -564,8 +578,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { swp_entry_t entry = pmd_to_swp_entry(*pmd); - if (is_migration_entry(entry)) + if (is_migration_entry(entry)) { + migration = true; page = pfn_swap_entry_to_page(entry); + } } if (IS_ERR_OR_NULL(page)) return; @@ -577,7 +593,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, /* pass */; else mss->file_thp += HPAGE_PMD_SIZE; - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); + + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), + locked, migration); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -1378,6 +1396,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, { u64 frame = 0, flags = 0; struct page *page = NULL; + bool migration = false; if (pte_present(pte)) { if (pm->show_pfn) @@ -1399,13 +1418,14 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, frame = swp_type(entry) | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; + migration = is_migration_entry(entry); if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); } if (page && !PageAnon(page)) flags |= PM_FILE; - if (page && page_mapcount(page) == 1) + if (page && !migration && page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -1421,8 +1441,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, spinlock_t *ptl; pte_t *pte, *orig_pte; int err = 0; - #ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool migration = false; + ptl = pmd_trans_huge_lock(pmdp, vma); if (ptl) { u64 flags = 0, frame = 0; @@ -1461,11 +1482,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (pmd_swp_uffd_wp(pmd)) flags |= PM_UFFD_WP; VM_BUG_ON(!is_pmd_migration_entry(pmd)); + migration = is_migration_entry(entry); page = pfn_swap_entry_to_page(entry); } #endif - if (page && page_mapcount(page) == 1) + if (page && !migration && page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; for (; addr != end; addr += PAGE_SIZE) { diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 22d904bde6ab9158702e4d4d83a42ebb4056770d..a74aef99bd3d6105b07da8ee90f07f1ff15d0c69 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -690,9 +690,14 @@ int dquot_quota_sync(struct super_block *sb, int type) /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); + if (sb->s_op->sync_fs) { + ret = sb->s_op->sync_fs(sb, 1); + if (ret) + return ret; + } + ret = sync_blockdev(sb->s_bdev); + if (ret) + return ret; /* * Now when everything is written we can discard the pagecache so diff --git a/fs/smbfs_common/smb2pdu.h b/fs/smbfs_common/smb2pdu.h index 7ccadcbe684b99bd7c99557c2151beeb3d230742..38b8fc51486049d4de706f9fcdec3ef1d0bdaf6d 100644 --- a/fs/smbfs_common/smb2pdu.h +++ b/fs/smbfs_common/smb2pdu.h @@ -449,7 +449,7 @@ struct smb2_netname_neg_context { */ /* Flags */ -#define SMB2_ACCEPT_TRANSFORM_LEVEL_SECURITY 0x00000001 +#define SMB2_ACCEPT_TRANSPORT_LEVEL_SECURITY 0x00000001 struct smb2_transport_capabilities_context { __le16 ContextType; /* 6 */ diff --git a/fs/smbfs_common/smbfsctl.h b/fs/smbfs_common/smbfsctl.h index 926f87cd6af025f7bebadc47e3a568b3f36acb4b..d51939c43ad7f5e70a60bbb47e1570bad14b283d 100644 --- a/fs/smbfs_common/smbfsctl.h +++ b/fs/smbfs_common/smbfsctl.h @@ -95,8 +95,10 @@ #define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */ #define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C #define FSCTL_GET_REFS_VOLUME_DATA 0x000902D8 /* See MS-FSCC 2.3.24 */ +#define FSCTL_SET_INTEGRITY_INFORMATION_EXT 0x00090380 #define FSCTL_GET_RETRIEVAL_POINTERS_AND_REFCOUNT 0x000903d3 #define FSCTL_GET_RETRIEVAL_POINTER_COUNT 0x0009042b +#define FSCTL_REFS_STREAM_SNAPSHOT_MANAGEMENT 0x00090440 #define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF #define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */ #define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */ diff --git a/fs/super.c b/fs/super.c index a6405d44d4ca2e210f6277728195dfe1c7530142..f1d4a193602d673b8aaf435d9d2d1c25388221b1 100644 --- a/fs/super.c +++ b/fs/super.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -260,7 +259,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags, s->s_time_gran = 1000000000; s->s_time_min = TIME64_MIN; s->s_time_max = TIME64_MAX; - s->cleancache_poolid = CLEANCACHE_NO_POOL; s->s_shrink.seeks = DEFAULT_SEEKS; s->s_shrink.scan_objects = super_cache_scan; @@ -330,7 +328,6 @@ void deactivate_locked_super(struct super_block *s) { struct file_system_type *fs = s->s_type; if (atomic_dec_and_test(&s->s_active)) { - cleancache_invalidate_fs(s); unregister_shrinker(&s->s_shrink); fs->kill_sb(s); @@ -1619,11 +1616,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb) percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_); } -static void sb_freeze_unlock(struct super_block *sb) +static void sb_freeze_unlock(struct super_block *sb, int level) { - int level; - - for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) + for (level--; level >= 0; level--) percpu_up_write(sb->s_writers.rw_sem + level); } @@ -1694,7 +1689,14 @@ int freeze_super(struct super_block *sb) sb_wait_write(sb, SB_FREEZE_PAGEFAULT); /* All writers are done so after syncing there won't be dirty data */ - sync_filesystem(sb); + ret = sync_filesystem(sb); + if (ret) { + sb->s_writers.frozen = SB_UNFROZEN; + sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); + wake_up(&sb->s_writers.wait_unfrozen); + deactivate_locked_super(sb); + return ret; + } /* Now wait for internal filesystem counter */ sb->s_writers.frozen = SB_FREEZE_FS; @@ -1706,7 +1708,7 @@ int freeze_super(struct super_block *sb) printk(KERN_ERR "VFS:Filesystem freeze failed\n"); sb->s_writers.frozen = SB_UNFROZEN; - sb_freeze_unlock(sb); + sb_freeze_unlock(sb, SB_FREEZE_FS); wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; @@ -1751,7 +1753,7 @@ static int thaw_super_locked(struct super_block *sb) } sb->s_writers.frozen = SB_UNFROZEN; - sb_freeze_unlock(sb); + sb_freeze_unlock(sb, SB_FREEZE_FS); out: wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); diff --git a/fs/sync.c b/fs/sync.c index 3ce8e2137f3109e36ec29895f73c9749893f70d6..c7690016453e460c4b295df9f5c15f7b7f0bf756 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -29,7 +29,7 @@ */ int sync_filesystem(struct super_block *sb) { - int ret; + int ret = 0; /* * We need to be protected against the filesystem going from @@ -52,15 +52,21 @@ int sync_filesystem(struct super_block *sb) * at a time. */ writeback_inodes_sb(sb, WB_REASON_SYNC); - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 0); + if (sb->s_op->sync_fs) { + ret = sb->s_op->sync_fs(sb, 0); + if (ret) + return ret; + } ret = sync_blockdev_nowait(sb->s_bdev); - if (ret < 0) + if (ret) return ret; sync_inodes_sb(sb); - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); + if (sb->s_op->sync_fs) { + ret = sb->s_op->sync_fs(sb, 1); + if (ret) + return ret; + } return sync_blockdev(sb->s_bdev); } EXPORT_SYMBOL(sync_filesystem); diff --git a/fs/sysctls.c b/fs/sysctls.c new file mode 100644 index 0000000000000000000000000000000000000000..c701273c9432bcf7000c80ca7af2bf4ef92c5303 --- /dev/null +++ b/fs/sysctls.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * /proc/sys/fs shared sysctls + * + * These sysctls are shared between different filesystems. + */ +#include +#include + +static struct ctl_table fs_shared_sysctls[] = { + { + .procname = "overflowuid", + .data = &fs_overflowuid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_MAXOLDUID, + }, + { + .procname = "overflowgid", + .data = &fs_overflowgid, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_MAXOLDUID, + }, + { } +}; + +DECLARE_SYSCTL_BASE(fs, fs_shared_sysctls); + +static int __init init_fs_sysctls(void) +{ + return register_sysctl_base(fs); +} + +early_initcall(init_fs_sysctls); diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1d6b7a50736bac681f03dc8af2ca7f8bb9f26681..ea8f6cd01f501696cd276e58b8229af677283796 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -258,10 +258,6 @@ int udf_expand_file_adinicb(struct inode *inode) char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); int err; - struct writeback_control udf_wbc = { - .sync_mode = WB_SYNC_NONE, - .nr_to_write = 1, - }; WARN_ON_ONCE(!inode_is_locked(inode)); if (!iinfo->i_lenAlloc) { @@ -305,8 +301,10 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; + set_page_dirty(page); + unlock_page(page); up_write(&iinfo->i_data_sem); - err = inode->i_data.a_ops->writepage(page, &udf_wbc); + err = filemap_fdatawrite(inode->i_mapping); if (err) { /* Restore everything back so that we don't lose data... */ lock_page(page); @@ -317,6 +315,7 @@ int udf_expand_file_adinicb(struct inode *inode) unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; inode->i_data.a_ops = &udf_adinicb_aops; + iinfo->i_lenAlloc = inode->i_size; up_write(&iinfo->i_data_sem); } put_page(page); diff --git a/fs/unicode/Kconfig b/fs/unicode/Kconfig index 610d7bc05d6e3318573dc4fa82e787852b0ff327..da786a687fdc70967d1be8e9cfb85b2a6bcf5bef 100644 --- a/fs/unicode/Kconfig +++ b/fs/unicode/Kconfig @@ -3,21 +3,13 @@ # UTF-8 normalization # config UNICODE - bool "UTF-8 normalization and casefolding support" + tristate "UTF-8 normalization and casefolding support" help Say Y here to enable UTF-8 NFD normalization and NFD+CF casefolding - support. - -config UNICODE_UTF8_DATA - tristate "UTF-8 normalization and casefolding tables" - depends on UNICODE - default UNICODE - help - This contains a large table of case foldings, which can be loaded as - a separate module if you say M here. To be on the safe side stick - to the default of Y. Saying N here makes no sense, if you do not want - utf8 casefolding support, disable CONFIG_UNICODE instead. + support. If you say M here the large table of case foldings will + be a separate loadable module that gets requested only when a file + system actually use it. config UNICODE_NORMALIZATION_SELFTEST tristate "Test UTF-8 normalization support" - depends on UNICODE_UTF8_DATA + depends on UNICODE diff --git a/fs/unicode/Makefile b/fs/unicode/Makefile index 2f9d9188852b5118da0dac2d034b5a054b1de6c3..0cc87423de828082c2320fe5e5e7d54969258a2b 100644 --- a/fs/unicode/Makefile +++ b/fs/unicode/Makefile @@ -1,8 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_UNICODE) += unicode.o +ifneq ($(CONFIG_UNICODE),) +obj-y += unicode.o +endif +obj-$(CONFIG_UNICODE) += utf8data.o obj-$(CONFIG_UNICODE_NORMALIZATION_SELFTEST) += utf8-selftest.o -obj-$(CONFIG_UNICODE_UTF8_DATA) += utf8data.o unicode-y := utf8-norm.o utf8-core.o diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 2705f91bdd0d7cdf1508ab98bf934e0e854b8e40..9d6a67c7d2271a49cb0783101e5ef643babffc56 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -136,7 +136,20 @@ done: memalloc_nofs_restore(nofs_flag); } -/* Finish all pending io completions. */ +/* + * Finish all pending IO completions that require transactional modifications. + * + * We try to merge physical and logically contiguous ioends before completion to + * minimise the number of transactions we need to perform during IO completion. + * Both unwritten extent conversion and COW remapping need to iterate and modify + * one physical extent at a time, so we gain nothing by merging physically + * discontiguous extents here. + * + * The ioend chain length that we can be processing here is largely unbound in + * length and we may have to perform significant amounts of work on each ioend + * to complete it. Hence we have to be careful about holding the CPU for too + * long in this loop. + */ void xfs_end_io( struct work_struct *work) @@ -157,6 +170,7 @@ xfs_end_io( list_del_init(&ioend->io_list); iomap_ioend_try_merge(ioend, &tmp); xfs_end_ioend(ioend); + cond_resched(); } } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index d4a387d3d0cec01eb3d89380674b7a0caaa4b5e7..eb2e387ba5287d953c7bfdbf68f245c1f323c6d6 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -850,9 +850,6 @@ xfs_alloc_file_space( rblocks = 0; } - /* - * Allocate and setup the transaction. - */ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, dblocks, rblocks, false, &tp); if (error) @@ -869,9 +866,9 @@ xfs_alloc_file_space( if (error) goto error; - /* - * Complete the transaction - */ + ip->i_diflags |= XFS_DIFLAG_PREALLOC; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 22ad207bedf4e66c695dfe9ff2d557dd45cc1189..5bddb1e9e0b3eea622efa37e58d15cd69f515bc2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -66,40 +66,6 @@ xfs_is_falloc_aligned( return !((pos | len) & mask); } -int -xfs_update_prealloc_flags( - struct xfs_inode *ip, - enum xfs_prealloc_flags flags) -{ - struct xfs_trans *tp; - int error; - - error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid, - 0, 0, 0, &tp); - if (error) - return error; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - - if (!(flags & XFS_PREALLOC_INVISIBLE)) { - VFS_I(ip)->i_mode &= ~S_ISUID; - if (VFS_I(ip)->i_mode & S_IXGRP) - VFS_I(ip)->i_mode &= ~S_ISGID; - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - } - - if (flags & XFS_PREALLOC_SET) - ip->i_diflags |= XFS_DIFLAG_PREALLOC; - if (flags & XFS_PREALLOC_CLEAR) - ip->i_diflags &= ~XFS_DIFLAG_PREALLOC; - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (flags & XFS_PREALLOC_SYNC) - xfs_trans_set_sync(tp); - return xfs_trans_commit(tp); -} - /* * Fsync operations on directories are much simpler than on regular files, * as there is no file data to flush, and thus also no need for explicit @@ -895,6 +861,21 @@ xfs_break_layouts( return error; } +/* Does this file, inode, or mount want synchronous writes? */ +static inline bool xfs_file_sync_writes(struct file *filp) +{ + struct xfs_inode *ip = XFS_I(file_inode(filp)); + + if (xfs_has_wsync(ip->i_mount)) + return true; + if (filp->f_flags & (__O_SYNC | O_DSYNC)) + return true; + if (IS_SYNC(file_inode(filp))) + return true; + + return false; +} + #define XFS_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ @@ -910,7 +891,6 @@ xfs_file_fallocate( struct inode *inode = file_inode(file); struct xfs_inode *ip = XFS_I(inode); long error; - enum xfs_prealloc_flags flags = 0; uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; loff_t new_size = 0; bool do_file_insert = false; @@ -955,6 +935,10 @@ xfs_file_fallocate( goto out_unlock; } + error = file_modified(file); + if (error) + goto out_unlock; + if (mode & FALLOC_FL_PUNCH_HOLE) { error = xfs_free_file_space(ip, offset, len); if (error) @@ -1004,8 +988,6 @@ xfs_file_fallocate( } do_file_insert = true; } else { - flags |= XFS_PREALLOC_SET; - if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) { new_size = offset + len; @@ -1057,13 +1039,6 @@ xfs_file_fallocate( } } - if (file->f_flags & O_DSYNC) - flags |= XFS_PREALLOC_SYNC; - - error = xfs_update_prealloc_flags(ip, flags); - if (error) - goto out_unlock; - /* Change file size if needed */ if (new_size) { struct iattr iattr; @@ -1082,8 +1057,14 @@ xfs_file_fallocate( * leave shifted extents past EOF and hence losing access to * the data that is contained within them. */ - if (do_file_insert) + if (do_file_insert) { error = xfs_insert_file_space(ip, offset, len); + if (error) + goto out_unlock; + } + + if (xfs_file_sync_writes(file)) + error = xfs_log_force_inode(ip); out_unlock: xfs_iunlock(ip, iolock); @@ -1115,21 +1096,6 @@ xfs_file_fadvise( return ret; } -/* Does this file, inode, or mount want synchronous writes? */ -static inline bool xfs_file_sync_writes(struct file *filp) -{ - struct xfs_inode *ip = XFS_I(file_inode(filp)); - - if (xfs_has_wsync(ip->i_mount)) - return true; - if (filp->f_flags & (__O_SYNC | O_DSYNC)) - return true; - if (IS_SYNC(file_inode(filp))) - return true; - - return false; -} - STATIC loff_t xfs_file_remap_range( struct file *file_in, diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 2e718728986fc376c212428a54eb42753aa6856a..9644f938990c54717f337795755aeab1d7a235e5 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1854,28 +1854,20 @@ xfs_inodegc_worker( } /* - * Force all currently queued inode inactivation work to run immediately, and - * wait for the work to finish. Two pass - queue all the work first pass, wait - * for it in a second pass. + * Force all currently queued inode inactivation work to run immediately and + * wait for the work to finish. */ void xfs_inodegc_flush( struct xfs_mount *mp) { - struct xfs_inodegc *gc; - int cpu; - if (!xfs_is_inodegc_enabled(mp)) return; trace_xfs_inodegc_flush(mp, __return_address); xfs_inodegc_queue_all(mp); - - for_each_online_cpu(cpu) { - gc = per_cpu_ptr(mp->m_inodegc, cpu); - flush_work(&gc->work); - } + flush_workqueue(mp->m_inodegc_wq); } /* @@ -1886,18 +1878,12 @@ void xfs_inodegc_stop( struct xfs_mount *mp) { - struct xfs_inodegc *gc; - int cpu; - if (!xfs_clear_inodegc_enabled(mp)) return; xfs_inodegc_queue_all(mp); + drain_workqueue(mp->m_inodegc_wq); - for_each_online_cpu(cpu) { - gc = per_cpu_ptr(mp->m_inodegc, cpu); - cancel_work_sync(&gc->work); - } trace_xfs_inodegc_stop(mp, __return_address); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index c447bf04205a8eeeba68b279871b5edb240b384e..b7e8f14d9fca05a0855a9f9d870b42b1a82a8428 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -462,15 +462,6 @@ xfs_itruncate_extents( } /* from xfs_file.c */ -enum xfs_prealloc_flags { - XFS_PREALLOC_SET = (1 << 1), - XFS_PREALLOC_CLEAR = (1 << 2), - XFS_PREALLOC_SYNC = (1 << 3), - XFS_PREALLOC_INVISIBLE = (1 << 4), -}; - -int xfs_update_prealloc_flags(struct xfs_inode *ip, - enum xfs_prealloc_flags flags); int xfs_break_layouts(struct inode *inode, uint *iolock, enum layout_break_reason reason); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 03a6198c97f6e601ae45444432a744802edbbfbd..2515fe8299e11485092b511d60725533808e8600 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -1464,7 +1464,7 @@ xfs_ioc_getbmap( if (bmx.bmv_count < 2) return -EINVAL; - if (bmx.bmv_count > ULONG_MAX / recsize) + if (bmx.bmv_count >= INT_MAX / recsize) return -ENOMEM; buf = kvcalloc(bmx.bmv_count, sizeof(*buf), GFP_KERNEL); diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index fc5a91f3a5e02e019278f3d58501fb12c43e9685..c14852362fceab4fbea30ee44379a5a501afead3 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -142,24 +142,6 @@ typedef struct compat_xfs_fsop_attrmulti_handlereq { _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) #ifdef BROKEN_X86_ALIGNMENT -/* on ia32 l_start is on a 32-bit boundary */ -typedef struct compat_xfs_flock64 { - __s16 l_type; - __s16 l_whence; - __s64 l_start __attribute__((packed)); - /* len == 0 means until end of file */ - __s64 l_len __attribute__((packed)); - __s32 l_sysid; - __u32 l_pid; - __s32 l_pad[4]; /* reserve area */ -} compat_xfs_flock64_t; - -#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64) -#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) -#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) -#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) -#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64) - typedef struct compat_xfs_fsop_geom_v1 { __u32 blocksize; /* filesystem (data) block size */ __u32 rtextsize; /* realtime extent size */ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index d6334abbc0b3e309fca6e2fc4df0f261739edb30..4abe17312c2b27c26cc5876167b4d66b55244723 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -70,6 +70,40 @@ xfs_fs_get_uuid( return 0; } +/* + * We cannot use file based VFS helpers such as file_modified() to update + * inode state as we modify the data/metadata in the inode here. Hence we have + * to open code the timestamp updates and SUID/SGID stripping. We also need + * to set the inode prealloc flag to ensure that the extents we allocate are not + * removed if the inode is reclaimed from memory before xfs_fs_block_commit() + * is from the client to indicate that data has been written and the file size + * can be extended. + */ +static int +xfs_fs_map_update_inode( + struct xfs_inode *ip) +{ + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid, + 0, 0, 0, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + + VFS_I(ip)->i_mode &= ~S_ISUID; + if (VFS_I(ip)->i_mode & S_IXGRP) + VFS_I(ip)->i_mode &= ~S_ISGID; + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + ip->i_diflags |= XFS_DIFLAG_PREALLOC; + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + return xfs_trans_commit(tp); +} + /* * Get a layout for the pNFS client. */ @@ -164,10 +198,12 @@ xfs_fs_map_blocks( * that the blocks allocated and handed out to the client are * guaranteed to be present even after a server crash. */ - error = xfs_update_prealloc_flags(ip, - XFS_PREALLOC_SET | XFS_PREALLOC_SYNC); + error = xfs_fs_map_update_inode(ip); + if (!error) + error = xfs_log_force_inode(ip); if (error) goto out_unlock; + } else { xfs_iunlock(ip, lock_flags); } @@ -255,7 +291,7 @@ xfs_fs_commit_blocks( length = end - start; if (!length) continue; - + /* * Make sure reads through the pagecache see the new data. */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e8f37bdc835484c0817438cf40042b517e8fdc1b..4c0dee78b2f8bbb8a11a827fc3464fad4e3c4e13 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -735,6 +735,7 @@ xfs_fs_sync_fs( int wait) { struct xfs_mount *mp = XFS_M(sb); + int error; trace_xfs_fs_sync_fs(mp, __return_address); @@ -744,7 +745,10 @@ xfs_fs_sync_fs( if (!wait) return 0; - xfs_log_force(mp, XFS_LOG_SYNC); + error = xfs_log_force(mp, XFS_LOG_SYNC); + if (error) + return error; + if (laptop_mode) { /* * The disk must be active because we're syncing. diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 3d503e74037fd9d1f191854dea0758b17bc05520..fd7e8fbaeef1585ddf6cd40a07cb8323d344cd1f 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -285,7 +285,7 @@ do { \ * write-combining memory accesses before this macro with those after it. */ #ifndef io_stop_wc -#define io_stop_wc do { } while (0) +#define io_stop_wc() do { } while (0) #endif #endif /* !__ASSEMBLY__ */ diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h index df9b5bc3d2827fa5732fb072a30154a10dce0471..a47b8a71d6fe126464564e05559e4763f0c006e9 100644 --- a/include/asm-generic/bitops.h +++ b/include/asm-generic/bitops.h @@ -20,7 +20,6 @@ #include #include #include -#include #ifndef _LINUX_BITOPS_H #error only can be included directly diff --git a/include/asm-generic/bitops/le.h b/include/asm-generic/bitops/le.h index 5a28629cbf4d4d7082d6cc31e68aca11e0a88bbf..d51beff603755049e5502f39acc1f06da4801584 100644 --- a/include/asm-generic/bitops/le.h +++ b/include/asm-generic/bitops/le.h @@ -2,83 +2,19 @@ #ifndef _ASM_GENERIC_BITOPS_LE_H_ #define _ASM_GENERIC_BITOPS_LE_H_ -#include #include #include -#include #if defined(__LITTLE_ENDIAN) #define BITOP_LE_SWIZZLE 0 -static inline unsigned long find_next_zero_bit_le(const void *addr, - unsigned long size, unsigned long offset) -{ - return find_next_zero_bit(addr, size, offset); -} - -static inline unsigned long find_next_bit_le(const void *addr, - unsigned long size, unsigned long offset) -{ - return find_next_bit(addr, size, offset); -} - -static inline unsigned long find_first_zero_bit_le(const void *addr, - unsigned long size) -{ - return find_first_zero_bit(addr, size); -} - #elif defined(__BIG_ENDIAN) #define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7) -#ifndef find_next_zero_bit_le -static inline -unsigned long find_next_zero_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val = *(const unsigned long *)addr; - - if (unlikely(offset >= size)) - return size; - - val = swab(val) | ~GENMASK(size - 1, offset); - return val == ~0UL ? size : ffz(val); - } - - return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); -} -#endif - -#ifndef find_next_bit_le -static inline -unsigned long find_next_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val = *(const unsigned long *)addr; - - if (unlikely(offset >= size)) - return size; - - val = swab(val) & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; - } - - return _find_next_bit(addr, NULL, size, offset, 0UL, 1); -} #endif -#ifndef find_first_zero_bit_le -#define find_first_zero_bit_le(addr, size) \ - find_next_zero_bit_le((addr), (size), 0) -#endif - -#else -#error "Please fix " -#endif static inline int test_bit_le(int nr, const void *addr) { diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 02932efad3ab469ff700816c0fea40c17c4f11c1..977bea16cf1bca953582e6dd547e94702548a1ca 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -147,6 +147,15 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #if CONFIG_PGTABLE_LEVELS > 3 +static inline pud_t *__pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + gfp_t gfp = GFP_PGTABLE_USER; + + if (mm == &init_mm) + gfp = GFP_PGTABLE_KERNEL; + return (pud_t *)get_zeroed_page(gfp); +} + #ifndef __HAVE_ARCH_PUD_ALLOC_ONE /** * pud_alloc_one - allocate a page for PUD-level page table @@ -159,20 +168,23 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) */ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { - gfp_t gfp = GFP_PGTABLE_USER; - - if (mm == &init_mm) - gfp = GFP_PGTABLE_KERNEL; - return (pud_t *)get_zeroed_page(gfp); + return __pud_alloc_one(mm, addr); } #endif -static inline void pud_free(struct mm_struct *mm, pud_t *pud) +static inline void __pud_free(struct mm_struct *mm, pud_t *pud) { BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); free_page((unsigned long)pud); } +#ifndef __HAVE_ARCH_PUD_FREE +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + __pud_free(mm, pud); +} +#endif + #endif /* CONFIG_PGTABLE_LEVELS > 3 */ #ifndef __HAVE_ARCH_PGD_FREE diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index d39cfa0d333e9282533ea8b17100c4e4c11cb3fc..52363eee2b20e6763c6b473a7137793258a8daa2 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -24,14 +24,11 @@ static inline void blake2s_set_lastblock(struct blake2s_state *state) state->f[0] = -1; } -typedef void (*blake2s_compress_t)(struct blake2s_state *state, - const u8 *block, size_t nblocks, u32 inc); - /* Helper functions for BLAKE2s shared by the library and shash APIs */ -static inline void __blake2s_update(struct blake2s_state *state, - const u8 *in, size_t inlen, - blake2s_compress_t compress) +static __always_inline void +__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, + bool force_generic) { const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; @@ -39,7 +36,12 @@ static inline void __blake2s_update(struct blake2s_state *state, return; if (inlen > fill) { memcpy(state->buf + state->buflen, in, fill); - (*compress)(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); + if (force_generic) + blake2s_compress_generic(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); + else + blake2s_compress(state, state->buf, 1, + BLAKE2S_BLOCK_SIZE); state->buflen = 0; in += fill; inlen -= fill; @@ -47,7 +49,12 @@ static inline void __blake2s_update(struct blake2s_state *state, if (inlen > BLAKE2S_BLOCK_SIZE) { const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); /* Hash one less (full) block than strictly possible */ - (*compress)(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); + if (force_generic) + blake2s_compress_generic(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); + else + blake2s_compress(state, in, nblocks - 1, + BLAKE2S_BLOCK_SIZE); in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); } @@ -55,13 +62,16 @@ static inline void __blake2s_update(struct blake2s_state *state, state->buflen += inlen; } -static inline void __blake2s_final(struct blake2s_state *state, u8 *out, - blake2s_compress_t compress) +static __always_inline void +__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic) { blake2s_set_lastblock(state); memset(state->buf + state->buflen, 0, BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ - (*compress)(state, state->buf, 1, state->buflen); + if (force_generic) + blake2s_compress_generic(state, state->buf, 1, state->buflen); + else + blake2s_compress(state, state->buf, 1, state->buflen); cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); memcpy(out, state->h, state->outlen); } @@ -99,20 +109,20 @@ static inline int crypto_blake2s_init(struct shash_desc *desc) static inline int crypto_blake2s_update(struct shash_desc *desc, const u8 *in, unsigned int inlen, - blake2s_compress_t compress) + bool force_generic) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_update(state, in, inlen, compress); + __blake2s_update(state, in, inlen, force_generic); return 0; } static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, - blake2s_compress_t compress) + bool force_generic) { struct blake2s_state *state = shash_desc_ctx(desc); - __blake2s_final(state, out, compress); + __blake2s_final(state, out, force_generic); return 0; } diff --git a/include/dt-bindings/clock/dra7.h b/include/dt-bindings/clock/dra7.h index 7d57063b8a6518a2b51fe979fc969f5d7521c217..29ff6b89584874929137031b2ec28a778b14399b 100644 --- a/include/dt-bindings/clock/dra7.h +++ b/include/dt-bindings/clock/dra7.h @@ -84,17 +84,10 @@ #define DRA7_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) -/* iva clocks */ -#define DRA7_IVA_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) -#define DRA7_SL2IF_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) - /* dss clocks */ #define DRA7_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) -/* gpu clocks */ -#define DRA7_GPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) - /* l3init clocks */ #define DRA7_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) #define DRA7_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) @@ -267,10 +260,17 @@ #define DRA7_L3INSTR_L3_MAIN_2_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_L3INSTR_L3_INSTR_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) +/* iva clocks */ +#define DRA7_IVA_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) +#define DRA7_SL2IF_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) + /* dss clocks */ #define DRA7_DSS_DSS_CORE_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) #define DRA7_DSS_BB2D_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) +/* gpu clocks */ +#define DRA7_GPU_CLKCTRL DRA7_CLKCTRL_INDEX(0x20) + /* l3init clocks */ #define DRA7_L3INIT_MMC1_CLKCTRL DRA7_CLKCTRL_INDEX(0x28) #define DRA7_L3INIT_MMC2_CLKCTRL DRA7_CLKCTRL_INDEX(0x30) diff --git a/include/linux/aio.h b/include/linux/aio.h index b83e68dd006f1ea8aa4f740163c903bc05db2d1c..86892a4fe7c8b6d59cb55a72a7a6e26455835cbf 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -20,8 +20,4 @@ static inline void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) { } #endif /* CONFIG_AIO */ -/* for sysctl: */ -extern unsigned long aio_nr; -extern unsigned long aio_max_nr; - #endif /* __LINUX__AIO_H */ diff --git a/include/linux/ata.h b/include/linux/ata.h index 199e47e97d6459f1d96b4395448fdd7153182b79..21292b5bbb5509787a2f78d551e73b41aa950966 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -324,12 +324,12 @@ enum { ATA_LOG_NCQ_NON_DATA = 0x12, ATA_LOG_NCQ_SEND_RECV = 0x13, ATA_LOG_IDENTIFY_DEVICE = 0x30, + ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device log pages: */ ATA_LOG_SECURITY = 0x06, ATA_LOG_SATA_SETTINGS = 0x08, ATA_LOG_ZONED_INFORMATION = 0x09, - ATA_LOG_CONCURRENT_POSITIONING_RANGES = 0x47, /* Identify device SATA settings log:*/ ATA_LOG_DEVSLP_OFFSET = 0x30, diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index a241dcf50f39f76ea1beb9325758bece9760111f..7dba0847510ca56af8b50f3956db0634045ec4dd 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -54,12 +55,6 @@ struct device; * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area * bitmap_find_next_zero_area_off(buf, len, pos, n, mask, mask_off) as above - * bitmap_next_clear_region(map, &start, &end, nbits) Find next clear region - * bitmap_next_set_region(map, &start, &end, nbits) Find next set region - * bitmap_for_each_clear_region(map, rs, re, start, end) - * Iterate over all clear regions - * bitmap_for_each_set_region(map, rs, re, start, end) - * Iterate over all set regions * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n * bitmap_cut(dst, src, first, n, nbits) Cut n bits from first, copy rest @@ -466,14 +461,6 @@ static inline void bitmap_replace(unsigned long *dst, __bitmap_replace(dst, old, new, mask, nbits); } -static inline void bitmap_next_clear_region(unsigned long *bitmap, - unsigned int *rs, unsigned int *re, - unsigned int end) -{ - *rs = find_next_zero_bit(bitmap, end, *rs); - *re = find_next_bit(bitmap, end, *rs + 1); -} - static inline void bitmap_next_set_region(unsigned long *bitmap, unsigned int *rs, unsigned int *re, unsigned int end) @@ -482,25 +469,6 @@ static inline void bitmap_next_set_region(unsigned long *bitmap, *re = find_next_zero_bit(bitmap, end, *rs + 1); } -/* - * Bitmap region iterators. Iterates over the bitmap between [@start, @end). - * @rs and @re should be integer variables and will be set to start and end - * index of the current clear or set region. - */ -#define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \ - for ((rs) = (start), \ - bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \ - (rs) < (re); \ - (rs) = (re) + 1, \ - bitmap_next_clear_region((bitmap), &(rs), &(re), (end))) - -#define bitmap_for_each_set_region(bitmap, rs, re, start, end) \ - for ((rs) = (start), \ - bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \ - (rs) < (re); \ - (rs) = (re) + 1, \ - bitmap_next_set_region((bitmap), &(rs), &(re), (end))) - /** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. * @n: u64 value diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5e62e2383b7f19eba3f83a0b1435b77c72cff0fa..7aaed501f76866c4f34069c4769fff7696ef5235 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -32,40 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w); */ #include -#define for_each_set_bit(bit, addr, size) \ - for ((bit) = find_first_bit((addr), (size)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) - -/* same as for_each_set_bit() but use bit as value to start with */ -#define for_each_set_bit_from(bit, addr, size) \ - for ((bit) = find_next_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_bit((addr), (size), (bit) + 1)) - -#define for_each_clear_bit(bit, addr, size) \ - for ((bit) = find_first_zero_bit((addr), (size)); \ - (bit) < (size); \ - (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) - -/* same as for_each_clear_bit() but use bit as value to start with */ -#define for_each_clear_bit_from(bit, addr, size) \ - for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ - (bit) < (size); \ - (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) - -/** - * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits - * @start: bit offset to start search and to store the current iteration offset - * @clump: location to store copy of current 8-bit clump - * @bits: bitmap address to base the search on - * @size: bitmap size in number of bits - */ -#define for_each_set_clump8(start, clump, bits, size) \ - for ((start) = find_first_clump8(&(clump), (bits), (size)); \ - (start) < (size); \ - (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) - static inline int get_bitmask_order(unsigned int count) { int order; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9c95df26fc26b1fec99c5325f8f22ceeef59d175..f35aea98bc3510b6e2fd32fa9ece687c567ba15d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1258,6 +1258,7 @@ unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors, void disk_end_io_acct(struct gendisk *disk, unsigned int op, unsigned long start_time); +void bio_start_io_acct_time(struct bio *bio, unsigned long start_time); unsigned long bio_start_io_acct(struct bio *bio); void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time, struct block_device *orig_bdev); diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 6a89ea410e439c7c10c1ef33998ef02b3667fcf1..edf62eaa6285275a3bbd20824b9af1cbf7a4620e 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -35,6 +35,7 @@ #define CEPH_OPT_TCP_NODELAY (1<<4) /* TCP_NODELAY on TCP sockets */ #define CEPH_OPT_NOMSGSIGN (1<<5) /* don't sign msgs (msgr1) */ #define CEPH_OPT_ABORT_ON_FULL (1<<6) /* abort w/ ENOSPC when full */ +#define CEPH_OPT_RXBOUNCE (1<<7) /* double-buffer read data */ #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index ff99ce094cfaed19ad07d7d55f0ccedea8c88ea1..e7f2fb2fc2079717142683260332b9c77c3ddcae 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -383,6 +383,10 @@ struct ceph_connection_v2_info { struct ceph_gcm_nonce in_gcm_nonce; struct ceph_gcm_nonce out_gcm_nonce; + struct page **in_enc_pages; + int in_enc_page_cnt; + int in_enc_resid; + int in_enc_i; struct page **out_enc_pages; int out_enc_page_cnt; int out_enc_resid; @@ -457,6 +461,7 @@ struct ceph_connection { struct ceph_msg *out_msg; /* sending message (== tail of out_sent) */ + struct page *bounce_page; u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */ diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h deleted file mode 100644 index 5f5730c1d324ffe39121869405baa29285d8631a..0000000000000000000000000000000000000000 --- a/include/linux/cleancache.h +++ /dev/null @@ -1,124 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_CLEANCACHE_H -#define _LINUX_CLEANCACHE_H - -#include -#include -#include - -#define CLEANCACHE_NO_POOL -1 -#define CLEANCACHE_NO_BACKEND -2 -#define CLEANCACHE_NO_BACKEND_SHARED -3 - -#define CLEANCACHE_KEY_MAX 6 - -/* - * cleancache requires every file with a page in cleancache to have a - * unique key unless/until the file is removed/truncated. For some - * filesystems, the inode number is unique, but for "modern" filesystems - * an exportable filehandle is required (see exportfs.h) - */ -struct cleancache_filekey { - union { - ino_t ino; - __u32 fh[CLEANCACHE_KEY_MAX]; - u32 key[CLEANCACHE_KEY_MAX]; - } u; -}; - -struct cleancache_ops { - int (*init_fs)(size_t); - int (*init_shared_fs)(uuid_t *uuid, size_t); - int (*get_page)(int, struct cleancache_filekey, - pgoff_t, struct page *); - void (*put_page)(int, struct cleancache_filekey, - pgoff_t, struct page *); - void (*invalidate_page)(int, struct cleancache_filekey, pgoff_t); - void (*invalidate_inode)(int, struct cleancache_filekey); - void (*invalidate_fs)(int); -}; - -extern int cleancache_register_ops(const struct cleancache_ops *ops); -extern void __cleancache_init_fs(struct super_block *); -extern void __cleancache_init_shared_fs(struct super_block *); -extern int __cleancache_get_page(struct page *); -extern void __cleancache_put_page(struct page *); -extern void __cleancache_invalidate_page(struct address_space *, struct page *); -extern void __cleancache_invalidate_inode(struct address_space *); -extern void __cleancache_invalidate_fs(struct super_block *); - -#ifdef CONFIG_CLEANCACHE -#define cleancache_enabled (1) -static inline bool cleancache_fs_enabled_mapping(struct address_space *mapping) -{ - return mapping->host->i_sb->cleancache_poolid >= 0; -} -static inline bool cleancache_fs_enabled(struct page *page) -{ - return cleancache_fs_enabled_mapping(page->mapping); -} -#else -#define cleancache_enabled (0) -#define cleancache_fs_enabled(_page) (0) -#define cleancache_fs_enabled_mapping(_page) (0) -#endif - -/* - * The shim layer provided by these inline functions allows the compiler - * to reduce all cleancache hooks to nothingness if CONFIG_CLEANCACHE - * is disabled, to a single global variable check if CONFIG_CLEANCACHE - * is enabled but no cleancache "backend" has dynamically enabled it, - * and, for the most frequent cleancache ops, to a single global variable - * check plus a superblock element comparison if CONFIG_CLEANCACHE is enabled - * and a cleancache backend has dynamically enabled cleancache, but the - * filesystem referenced by that cleancache op has not enabled cleancache. - * As a result, CONFIG_CLEANCACHE can be enabled by default with essentially - * no measurable performance impact. - */ - -static inline void cleancache_init_fs(struct super_block *sb) -{ - if (cleancache_enabled) - __cleancache_init_fs(sb); -} - -static inline void cleancache_init_shared_fs(struct super_block *sb) -{ - if (cleancache_enabled) - __cleancache_init_shared_fs(sb); -} - -static inline int cleancache_get_page(struct page *page) -{ - if (cleancache_enabled && cleancache_fs_enabled(page)) - return __cleancache_get_page(page); - return -1; -} - -static inline void cleancache_put_page(struct page *page) -{ - if (cleancache_enabled && cleancache_fs_enabled(page)) - __cleancache_put_page(page); -} - -static inline void cleancache_invalidate_page(struct address_space *mapping, - struct page *page) -{ - /* careful... page->mapping is NULL sometimes when this is called */ - if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) - __cleancache_invalidate_page(mapping, page); -} - -static inline void cleancache_invalidate_inode(struct address_space *mapping) -{ - if (cleancache_enabled && cleancache_fs_enabled_mapping(mapping)) - __cleancache_invalidate_inode(mapping); -} - -static inline void cleancache_invalidate_fs(struct super_block *sb) -{ - if (cleancache_enabled) - __cleancache_invalidate_fs(sb); -} - -#endif /* _LINUX_CLEANCACHE_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 429dcebe2b9922a3153bc77d257d03b544926c80..0f7fd205ab7eacc973989bf17b6ea02026fbf934 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -117,14 +117,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, */ #define __stringify_label(n) #n -#define __annotate_reachable(c) ({ \ - asm volatile(__stringify_label(c) ":\n\t" \ - ".pushsection .discard.reachable\n\t" \ - ".long " __stringify_label(c) "b - .\n\t" \ - ".popsection\n\t" : : "i" (c)); \ -}) -#define annotate_reachable() __annotate_reachable(__COUNTER__) - #define __annotate_unreachable(c) ({ \ asm volatile(__stringify_label(c) ":\n\t" \ ".pushsection .discard.unreachable\n\t" \ @@ -133,24 +125,21 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, }) #define annotate_unreachable() __annotate_unreachable(__COUNTER__) -#define ASM_UNREACHABLE \ - "999:\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long 999b - .\n\t" \ +#define ASM_REACHABLE \ + "998:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long 998b - .\n\t" \ ".popsection\n\t" /* Annotate a C jump table to allow objtool to follow the code flow */ #define __annotate_jump_table __section(".rodata..c_jump_table") #else -#define annotate_reachable() #define annotate_unreachable() +# define ASM_REACHABLE #define __annotate_jump_table #endif -#ifndef ASM_UNREACHABLE -# define ASM_UNREACHABLE -#endif #ifndef unreachable # define unreachable() do { \ annotate_unreachable(); \ diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 78fcd776b185a853653e2bbd499e7d2c28675439..248a68c668b4590685ca02d699310ccb7d3373ce 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -14,10 +14,6 @@ struct core_vma_metadata { unsigned long dump_size; }; -extern int core_uses_pid; -extern char core_pattern[]; -extern unsigned int core_pipe_limit; - /* * These are the only things you should do on a core-file: use only these * functions to write out all the necessary info. @@ -37,4 +33,10 @@ extern void do_coredump(const kernel_siginfo_t *siginfo); static inline void do_coredump(const kernel_siginfo_t *siginfo) {} #endif +#if defined(CONFIG_COREDUMP) && defined(CONFIG_SYSCTL) +extern void validate_coredump_safety(void); +#else +static inline void validate_coredump_safety(void) {} +#endif + #endif /* _LINUX_COREDUMP_H */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 1e7399fc69c0a1e5304b892bb8accc15567cdfd3..64dae70d31f53847c1749233175d062d7333b3b8 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -123,6 +123,17 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return 0; } +static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) +{ + return 0; +} + +static inline unsigned int cpumask_first_and(const struct cpumask *srcp1, + const struct cpumask *srcp2) +{ + return 0; +} + static inline unsigned int cpumask_last(const struct cpumask *srcp) { return 0; @@ -167,7 +178,7 @@ static inline unsigned int cpumask_local_spread(unsigned int i, int node) static inline int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p) { - return cpumask_next_and(-1, src1p, src2p); + return cpumask_first_and(src1p, src2p); } static inline int cpumask_any_distribute(const struct cpumask *srcp) @@ -195,6 +206,30 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } +/** + * cpumask_first_zero - get the first unset cpu in a cpumask + * @srcp: the cpumask pointer + * + * Returns >= nr_cpu_ids if all cpus are set. + */ +static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) +{ + return find_first_zero_bit(cpumask_bits(srcp), nr_cpumask_bits); +} + +/** + * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 + * @src1p: the first input + * @src2p: the second input + * + * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). + */ +static inline +unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask *srcp2) +{ + return find_first_and_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), nr_cpumask_bits); +} + /** * cpumask_last - get the last CPU in a cpumask * @srcp: - the cpumask pointer @@ -585,15 +620,6 @@ static inline void cpumask_copy(struct cpumask *dstp, */ #define cpumask_any(srcp) cpumask_first(srcp) -/** - * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 - * @src1p: the first input - * @src2p: the second input - * - * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). - */ -#define cpumask_first_and(src1p, src2p) cpumask_next_and(-1, (src1p), (src2p)) - /** * cpumask_any_and - pick a "random" cpu from *mask1 & *mask2 * @mask1: the first input cpumask diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 9e23d33bb6f1c853d7c22aa03d6bcd9b3c823cd2..f5bba51480b2f720b45fda36567e0dc8b30b0f24 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -61,16 +61,6 @@ extern const struct qstr empty_name; extern const struct qstr slash_name; extern const struct qstr dotdot_name; -struct dentry_stat_t { - long nr_dentry; - long nr_unused; - long age_limit; /* age in seconds */ - long want_pages; /* pages requested by system */ - long nr_negative; /* # of unused negative dentries */ - long dummy; /* Reserved for future use */ -}; -extern struct dentry_stat_t dentry_stat; - /* * Try to keep struct dentry aligned on 64 byte cachelines (this will * give reasonable cacheline footprint with larger lines without the diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h index b87c3b85a166c4a6d31060ab7960565502d33f4d..b1d26f9f1c9fe5d6e986defae967bda1fe293431 100644 --- a/include/linux/dnotify.h +++ b/include/linux/dnotify.h @@ -29,7 +29,6 @@ struct dnotify_struct { FS_CREATE | FS_RENAME |\ FS_MOVED_FROM | FS_MOVED_TO) -extern int dir_notify_enable; extern void dnotify_flush(struct file *, fl_owner_t); extern int fcntl_dirnotify(int, struct file *, unsigned long); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index a26f37a2716762164c2e936460c2106a37e3c829..11efc45de66a9e60ff7b2d10b49ea9f22397895a 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -111,7 +111,7 @@ struct ethtool_link_ext_state_info { enum ethtool_link_ext_substate_bad_signal_integrity bad_signal_integrity; enum ethtool_link_ext_substate_cable_issue cable_issue; enum ethtool_link_ext_substate_module module; - u8 __link_ext_substate; + u32 __link_ext_substate; }; }; diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 3afdf339d53c937c3e864b2448731803f90a51f2..419cadcd7ff55d1a2a108ca00ee5a2c326aa740c 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -5,8 +5,6 @@ #include #include -extern struct ctl_table fanotify_table[]; /* for sysctl */ - #define FAN_GROUP_FLAG(group, flag) \ ((group)->fanotify_data.flags & (flag)) diff --git a/include/linux/fb.h b/include/linux/fb.h index 3da95842b2075adc8f658025120d43e9cbb6ed9e..02f362c661c8003928a928bd1a92bc6db2b0bd2b 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -262,7 +262,7 @@ struct fb_ops { /* Draws a rectangle */ void (*fb_fillrect) (struct fb_info *info, const struct fb_fillrect *rect); - /* Copy data from area to another. Obsolete. */ + /* Copy data from area to another */ void (*fb_copyarea) (struct fb_info *info, const struct fb_copyarea *region); /* Draws a image to the display */ void (*fb_imageblit) (struct fb_info *info, const struct fb_image *image); diff --git a/include/linux/find.h b/include/linux/find.h new file mode 100644 index 0000000000000000000000000000000000000000..5bb6db213bcb84a38be466136b9e8eb5c6932fc3 --- /dev/null +++ b/include/linux/find.h @@ -0,0 +1,372 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_FIND_H_ +#define __LINUX_FIND_H_ + +#ifndef __LINUX_BITMAP_H +#error only can be included directly +#endif + +#include + +extern unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert, unsigned long le); +extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size); +extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); + +#ifndef find_next_bit +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, NULL, size, offset, 0UL, 0); +} +#endif + +#ifndef find_next_and_bit +/** + * find_next_and_bit - find the next set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_next_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr1 & *addr2 & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); +} +#endif + +#ifndef find_next_zero_bit +/** + * find_next_zero_bit - find the next cleared bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + * + * Returns the bit number of the next zero bit + * If no bits are zero, returns @size. + */ +static inline +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); +} +#endif + +#ifndef find_first_bit +/** + * find_first_bit - find the first set bit in a memory region + * @addr: The address to start the search at + * @size: The maximum number of bits to search + * + * Returns the bit number of the first set bit. + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_bit(addr, size); +} +#endif + +#ifndef find_first_and_bit +/** + * find_first_and_bit - find the first set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_and_bit(addr1, addr2, size); +} +#endif + +#ifndef find_first_zero_bit +/** + * find_first_zero_bit - find the first cleared bit in a memory region + * @addr: The address to start the search at + * @size: The maximum number of bits to search + * + * Returns the bit number of the first cleared bit. + * If no bits are zero, returns @size. + */ +static inline +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr | ~GENMASK(size - 1, 0); + + return val == ~0UL ? size : ffz(val); + } + + return _find_first_zero_bit(addr, size); +} +#endif + +#ifndef find_last_bit +/** + * find_last_bit - find the last set bit in a memory region + * @addr: The address to start the search at + * @size: The number of bits to search + * + * Returns the bit number of the last set bit, or size. + */ +static inline +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __fls(val) : size; + } + + return _find_last_bit(addr, size); +} +#endif + +/** + * find_next_clump8 - find next 8-bit clump with set bits in a memory region + * @clump: location to store copy of found clump + * @addr: address to base the search on + * @size: bitmap size in number of bits + * @offset: bit offset at which to start searching + * + * Returns the bit offset for the next set clump; the found clump value is + * copied to the location pointed by @clump. If no bits are set, returns @size. + */ +extern unsigned long find_next_clump8(unsigned long *clump, + const unsigned long *addr, + unsigned long size, unsigned long offset); + +#define find_first_clump8(clump, bits, size) \ + find_next_clump8((clump), (bits), (size), 0) + +#if defined(__LITTLE_ENDIAN) + +static inline unsigned long find_next_zero_bit_le(const void *addr, + unsigned long size, unsigned long offset) +{ + return find_next_zero_bit(addr, size, offset); +} + +static inline unsigned long find_next_bit_le(const void *addr, + unsigned long size, unsigned long offset) +{ + return find_next_bit(addr, size, offset); +} + +static inline unsigned long find_first_zero_bit_le(const void *addr, + unsigned long size) +{ + return find_first_zero_bit(addr, size); +} + +#elif defined(__BIG_ENDIAN) + +#ifndef find_next_zero_bit_le +static inline +unsigned long find_next_zero_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + + return _find_next_bit(addr, NULL, size, offset, ~0UL, 1); +} +#endif + +#ifndef find_next_bit_le +static inline +unsigned long find_next_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val = *(const unsigned long *)addr; + + if (unlikely(offset >= size)) + return size; + + val = swab(val) & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_bit(addr, NULL, size, offset, 0UL, 1); +} +#endif + +#ifndef find_first_zero_bit_le +#define find_first_zero_bit_le(addr, size) \ + find_next_zero_bit_le((addr), (size), 0) +#endif + +#else +#error "Please fix " +#endif + +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), 0); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +#define for_each_clear_bit(bit, addr, size) \ + for ((bit) = find_next_zero_bit((addr), (size), 0); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + +/* same as for_each_clear_bit() but use bit as value to start with */ +#define for_each_clear_bit_from(bit, addr, size) \ + for ((bit) = find_next_zero_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_zero_bit((addr), (size), (bit) + 1)) + +/** + * for_each_set_bitrange - iterate over all set bit ranges [b; e) + * @b: bit offset of start of current bitrange (first set bit) + * @e: bit offset of end of current bitrange (first unset bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_bitrange(b, e, addr, size) \ + for ((b) = find_next_bit((addr), (size), 0), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_bit((addr), (size), (e) + 1), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1)) + +/** + * for_each_set_bitrange_from - iterate over all set bit ranges [b; e) + * @b: bit offset of start of current bitrange (first set bit); must be initialized + * @e: bit offset of end of current bitrange (first unset bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_bitrange_from(b, e, addr, size) \ + for ((b) = find_next_bit((addr), (size), (b)), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_bit((addr), (size), (e) + 1), \ + (e) = find_next_zero_bit((addr), (size), (b) + 1)) + +/** + * for_each_clear_bitrange - iterate over all unset bit ranges [b; e) + * @b: bit offset of start of current bitrange (first unset bit) + * @e: bit offset of end of current bitrange (first set bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_clear_bitrange(b, e, addr, size) \ + for ((b) = find_next_zero_bit((addr), (size), 0), \ + (e) = find_next_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_zero_bit((addr), (size), (e) + 1), \ + (e) = find_next_bit((addr), (size), (b) + 1)) + +/** + * for_each_clear_bitrange_from - iterate over all unset bit ranges [b; e) + * @b: bit offset of start of current bitrange (first set bit); must be initialized + * @e: bit offset of end of current bitrange (first unset bit) + * @addr: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_clear_bitrange_from(b, e, addr, size) \ + for ((b) = find_next_zero_bit((addr), (size), (b)), \ + (e) = find_next_bit((addr), (size), (b) + 1); \ + (b) < (size); \ + (b) = find_next_zero_bit((addr), (size), (e) + 1), \ + (e) = find_next_bit((addr), (size), (b) + 1)) + +/** + * for_each_set_clump8 - iterate over bitmap for each 8-bit clump with set bits + * @start: bit offset to start search and to store the current iteration offset + * @clump: location to store copy of current 8-bit clump + * @bits: bitmap address to base the search on + * @size: bitmap size in number of bits + */ +#define for_each_set_clump8(start, clump, bits, size) \ + for ((start) = find_first_clump8(&(clump), (bits), (size)); \ + (start) < (size); \ + (start) = find_next_clump8(&(clump), (bits), (size), (start) + 8)) + +#endif /*__LINUX_FIND_H_ */ diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index b07d88c92bb2932bbcfda047530337025d216f2e..a631bac12220c5d9847175de1afb69551b33f334 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -7,31 +7,17 @@ #include #include -/* - * Return code to denote that requested number of - * frontswap pages are unused(moved to page cache). - * Used in shmem_unuse and try_to_unuse. - */ -#define FRONTSWAP_PAGES_UNUSED 2 - struct frontswap_ops { void (*init)(unsigned); /* this swap type was just swapon'ed */ int (*store)(unsigned, pgoff_t, struct page *); /* store a page */ int (*load)(unsigned, pgoff_t, struct page *); /* load a page */ void (*invalidate_page)(unsigned, pgoff_t); /* page no longer needed */ void (*invalidate_area)(unsigned); /* swap type just swapoff'ed */ - struct frontswap_ops *next; /* private pointer to next ops */ }; -extern void frontswap_register_ops(struct frontswap_ops *ops); -extern void frontswap_shrink(unsigned long); -extern unsigned long frontswap_curr_pages(void); -extern void frontswap_writethrough(bool); -#define FRONTSWAP_HAS_EXCLUSIVE_GETS -extern void frontswap_tmem_exclusive_gets(bool); +int frontswap_register_ops(const struct frontswap_ops *ops); -extern bool __frontswap_test(struct swap_info_struct *, pgoff_t); -extern void __frontswap_init(unsigned type, unsigned long *map); +extern void frontswap_init(unsigned type, unsigned long *map); extern int __frontswap_store(struct page *page); extern int __frontswap_load(struct page *page); extern void __frontswap_invalidate_page(unsigned, pgoff_t); @@ -45,11 +31,6 @@ static inline bool frontswap_enabled(void) return static_branch_unlikely(&frontswap_enabled_key); } -static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) -{ - return __frontswap_test(sis, offset); -} - static inline void frontswap_map_set(struct swap_info_struct *p, unsigned long *map) { @@ -68,11 +49,6 @@ static inline bool frontswap_enabled(void) return false; } -static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) -{ - return false; -} - static inline void frontswap_map_set(struct swap_info_struct *p, unsigned long *map) { @@ -112,11 +88,4 @@ static inline void frontswap_invalidate_area(unsigned type) __frontswap_invalidate_area(type); } -static inline void frontswap_init(unsigned type, unsigned long *map) -{ -#ifdef CONFIG_FRONTSWAP - __frontswap_init(type, map); -#endif -} - #endif /* _LINUX_FRONTSWAP_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index c8510da6cc6dc88821b1265d9bb00357579a3dd1..e2d892b201b07a5046497f8e6fe3ff04c775df89 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -79,15 +79,8 @@ extern void __init inode_init_early(void); extern void __init files_init(void); extern void __init files_maxfiles_init(void); -extern struct files_stat_struct files_stat; extern unsigned long get_max_files(void); extern unsigned int sysctl_nr_open; -extern struct inodes_stat_t inodes_stat; -extern int leases_enable, lease_break_time; -extern int sysctl_protected_symlinks; -extern int sysctl_protected_hardlinks; -extern int sysctl_protected_fifos; -extern int sysctl_protected_regular; typedef __kernel_rwf_t rwf_t; @@ -1490,7 +1483,7 @@ struct super_block { #ifdef CONFIG_FS_VERITY const struct fsverity_operations *s_vop; #endif -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) struct unicode_map *s_encoding; __u16 s_encoding_flags; #endif @@ -1542,11 +1535,6 @@ struct super_block { const struct dentry_operations *s_d_op; /* default d_op for dentries */ - /* - * Saved pool identifier for cleancache (-1 means none) - */ - int cleancache_poolid; - struct shrinker s_shrink; /* per-sb shrinker handle */ /* Number of inodes with nlink == 0 but still referenced */ @@ -3533,12 +3521,6 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); struct ctl_table; -int proc_nr_files(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int proc_nr_dentry(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -int proc_nr_inodes(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); int __init list_bdev_fs_names(char *buf, size_t size); #define __FMODE_EXEC ((__force int) FMODE_EXEC) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index ede50406bcb0ed9a68c986a614575b993fc03953..296c5f1d9f35df98a922ed673380cd9a85153069 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -665,6 +665,11 @@ static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie, static inline void fscache_note_page_release(struct fscache_cookie *cookie) { + /* If we've written data to the cache (HAVE_DATA) and there wasn't any + * data in the cache when we started (NO_DATA_TO_READ), it may no + * longer be true that we can skip reading from the cache - so clear + * the flag that causes reads to be skipped. + */ if (cookie && test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) && test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 3a2d7dc3c6071a424d81305486a146f155c875b1..bb8467cd11ae28eb795db5eb17d4b78774da61ca 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -224,6 +224,43 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, dir, &new_dentry->d_name, 0); } +/* + * fsnotify_delete - @dentry was unlinked and unhashed + * + * Caller must make sure that dentry->d_name is stable. + * + * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode + * as this may be called after d_delete() and old_dentry may be negative. + */ +static inline void fsnotify_delete(struct inode *dir, struct inode *inode, + struct dentry *dentry) +{ + __u32 mask = FS_DELETE; + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name, + 0); +} + +/** + * d_delete_notify - delete a dentry and call fsnotify_delete() + * @dentry: The dentry to delete + * + * This helper is used to guaranty that the unlinked inode cannot be found + * by lookup of this name after fsnotify_delete() event has been delivered. + */ +static inline void d_delete_notify(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + + ihold(inode); + d_delete(dentry); + fsnotify_delete(dir, inode, dentry); + iput(inode); +} + /* * fsnotify_unlink - 'name' was unlinked * @@ -231,10 +268,10 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, */ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) { - /* Expected to be called before d_delete() */ - WARN_ON_ONCE(d_is_negative(dentry)); + if (WARN_ON_ONCE(d_is_negative(dentry))) + return; - fsnotify_dirent(dir, dentry, FS_DELETE); + fsnotify_delete(dir, d_inode(dentry), dentry); } /* @@ -258,10 +295,10 @@ static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry) */ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) { - /* Expected to be called before d_delete() */ - WARN_ON_ONCE(d_is_negative(dentry)); + if (WARN_ON_ONCE(d_is_negative(dentry))) + return; - fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR); + fsnotify_delete(dir, d_inode(dentry), dentry); } /* diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f565a89388364359acf1209be73d3ae0a5b05e62..fe2e0179ed51e3a3b6271dd04e200c0c25129193 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1262,6 +1262,7 @@ struct hv_device { struct vmbus_channel *channel; struct kset *channels_kset; struct device_dma_parameters dma_parms; + u64 dma_mask; /* place holder to keep track of the dir for hv device in debugfs */ struct dentry *debug_dir; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8420fe5049272bbfa108df794bb351f7d87f7a5c..2be4dd7e90a943785fe51d14ffd2d1ff9e0de8ee 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -46,8 +46,10 @@ struct vlan_hdr { * @h_vlan_encapsulated_proto: packet type ID or len */ struct vlan_ethhdr { - unsigned char h_dest[ETH_ALEN]; - unsigned char h_source[ETH_ALEN]; + struct_group(addrs, + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + ); __be16 h_vlan_proto; __be16 h_vlan_TCI; __be16 h_vlan_encapsulated_proto; diff --git a/include/linux/inotify.h b/include/linux/inotify.h index 6a24905f6e1e2964ecea4487e3140913d4bccc57..8d20caa1b2687b76ac3399ae1006f90d3026dbe3 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -7,11 +7,8 @@ #ifndef _LINUX_INOTIFY_H #define _LINUX_INOTIFY_H -#include #include -extern struct ctl_table inotify_table[]; /* for sysctl */ - #define ALL_INOTIFY_BITS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \ IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \ IN_MOVED_TO | IN_CREATE | IN_DELETE | \ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b55bd49e55f51e6e6084fc2710330994e6d20213..97a3a2edb58505bc37fe587aa46f456c16c3e46b 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -263,9 +263,11 @@ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_type; u16 io_flags; /* IOMAP_F_* */ + u32 io_folios; /* folios added to ioend */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ loff_t io_offset; /* offset in the file */ + sector_t io_sector; /* start sector of ioend */ struct bio *io_bio; /* bio being built */ struct bio io_inline_bio; /* MUST BE LAST! */ }; diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index fd933c45281af84c8391e3845021c70518c6b164..9c3ada74ffb18fccc862bc4196a178425de67cf8 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -594,7 +594,7 @@ struct transaction_s */ unsigned long t_log_start; - /* + /* * Number of buffers on the t_buffers list [j_list_lock, no locks * needed for jbd2 thread] */ @@ -1295,7 +1295,7 @@ struct journal_s * Clean-up after fast commit or full commit. JBD2 calls this function * after every commit operation. */ - void (*j_fc_cleanup_callback)(struct journal_s *journal, int); + void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid); /** * @j_fc_replay_callback: @@ -1419,9 +1419,7 @@ extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *); extern bool __jbd2_journal_refile_buffer(struct journal_head *); extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *); extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_free_buffer(struct journal_head *bh); extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int); -extern void __journal_clean_data_list(transaction_t *transaction); static inline void jbd2_file_log_bh(struct list_head *head, struct buffer_head *bh) { list_add_tail(&bh->b_assoc_buffers, head); @@ -1486,9 +1484,6 @@ extern int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct buffer_head **bh_out, sector_t blocknr); -/* Transaction locking */ -extern void __wait_on_journal (journal_t *); - /* Transaction cache support */ extern void jbd2_journal_destroy_transaction_cache(void); extern int __init jbd2_journal_init_transaction_cache(void); @@ -1543,6 +1538,8 @@ extern int jbd2_journal_flush(journal_t *journal, unsigned int flags); extern void jbd2_journal_lock_updates (journal_t *); extern void jbd2_journal_unlock_updates (journal_t *); +void jbd2_journal_wait_updates(journal_t *); + extern journal_t * jbd2_journal_init_dev(struct block_device *bdev, struct block_device *fs_dev, unsigned long long start, int len, int bsize); @@ -1774,8 +1771,6 @@ static inline unsigned long jbd2_log_space_left(journal_t *journal) #define BJ_Reserved 4 /* Buffer is reserved for access by journal */ #define BJ_Types 5 -extern int jbd_blocks_per_page(struct inode *inode); - /* JBD uses a CRC32 checksum */ #define JBD_MAX_CHECKSUM_SIZE 4 diff --git a/include/linux/kfence.h b/include/linux/kfence.h index 4b5e3679a72c78caf5a6aa2c185658328403fa2f..f49e64222628ae3a1a15df5c3a111c9f434b135c 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -17,6 +17,8 @@ #include #include +extern unsigned long kfence_sample_interval; + /* * We allocate an even number of pages, as it simplifies calculations to map * address to metadata indices; effectively, the very first page serves as an diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 8c8f7a4d93afb96518c18da10c7e8f5bee62ef2d..19b884353b159ae9fefdee3ae42684ce9384dcb0 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -348,12 +348,6 @@ extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs); DEFINE_INSN_CACHE_OPS(optinsn); -#ifdef CONFIG_SYSCTL -extern int sysctl_kprobes_optimization; -extern int proc_kprobes_optimization_handler(struct ctl_table *table, - int write, void *buffer, - size_t *length, loff_t *ppos); -#endif /* CONFIG_SYSCTL */ extern void wait_for_kprobe_optimizer(void); #else /* !CONFIG_OPTPROBES */ static inline void wait_for_kprobe_optimizer(void) { } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d89d564f7c19a198677177e800e919aac927872d..f11039944c08ffd7d5bfe6675f95c8b9f2d3dd86 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -29,7 +29,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -309,9 +311,6 @@ struct kvm_vcpu { u64 requests; unsigned long guest_debug; - int pre_pcpu; - struct list_head blocked_vcpu_list; - struct mutex mutex; struct kvm_run *run; @@ -371,8 +370,11 @@ struct kvm_vcpu { u64 last_used_slot_gen; }; -/* must be called with irqs disabled */ -static __always_inline void guest_enter_irqoff(void) +/* + * Start accounting time towards a guest. + * Must be called before entering guest context. + */ +static __always_inline void guest_timing_enter_irqoff(void) { /* * This is running in ioctl context so its safe to assume that it's the @@ -381,7 +383,18 @@ static __always_inline void guest_enter_irqoff(void) instrumentation_begin(); vtime_account_guest_enter(); instrumentation_end(); +} +/* + * Enter guest context and enter an RCU extended quiescent state. + * + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is + * unsafe to use any code which may directly or indirectly use RCU, tracing + * (including IRQ flag tracing), or lockdep. All code in this period must be + * non-instrumentable. + */ +static __always_inline void guest_context_enter_irqoff(void) +{ /* * KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode @@ -397,16 +410,79 @@ static __always_inline void guest_enter_irqoff(void) } } -static __always_inline void guest_exit_irqoff(void) +/* + * Deprecated. Architectures should move to guest_timing_enter_irqoff() and + * guest_state_enter_irqoff(). + */ +static __always_inline void guest_enter_irqoff(void) +{ + guest_timing_enter_irqoff(); + guest_context_enter_irqoff(); +} + +/** + * guest_state_enter_irqoff - Fixup state when entering a guest + * + * Entry to a guest will enable interrupts, but the kernel state is interrupts + * disabled when this is invoked. Also tell RCU about it. + * + * 1) Trace interrupts on state + * 2) Invoke context tracking if enabled to adjust RCU state + * 3) Tell lockdep that interrupts are enabled + * + * Invoked from architecture specific code before entering a guest. + * Must be called with interrupts disabled and the caller must be + * non-instrumentable. + * The caller has to invoke guest_timing_enter_irqoff() before this. + * + * Note: this is analogous to exit_to_user_mode(). + */ +static __always_inline void guest_state_enter_irqoff(void) +{ + instrumentation_begin(); + trace_hardirqs_on_prepare(); + lockdep_hardirqs_on_prepare(CALLER_ADDR0); + instrumentation_end(); + + guest_context_enter_irqoff(); + lockdep_hardirqs_on(CALLER_ADDR0); +} + +/* + * Exit guest context and exit an RCU extended quiescent state. + * + * Between guest_context_enter_irqoff() and guest_context_exit_irqoff() it is + * unsafe to use any code which may directly or indirectly use RCU, tracing + * (including IRQ flag tracing), or lockdep. All code in this period must be + * non-instrumentable. + */ +static __always_inline void guest_context_exit_irqoff(void) { context_tracking_guest_exit(); +} +/* + * Stop accounting time towards a guest. + * Must be called after exiting guest context. + */ +static __always_inline void guest_timing_exit_irqoff(void) +{ instrumentation_begin(); /* Flush the guest cputime we spent on the guest */ vtime_account_guest_exit(); instrumentation_end(); } +/* + * Deprecated. Architectures should move to guest_state_exit_irqoff() and + * guest_timing_exit_irqoff(). + */ +static __always_inline void guest_exit_irqoff(void) +{ + guest_context_exit_irqoff(); + guest_timing_exit_irqoff(); +} + static inline void guest_exit(void) { unsigned long flags; @@ -416,6 +492,33 @@ static inline void guest_exit(void) local_irq_restore(flags); } +/** + * guest_state_exit_irqoff - Establish state when returning from guest mode + * + * Entry from a guest disables interrupts, but guest mode is traced as + * interrupts enabled. Also with NO_HZ_FULL RCU might be idle. + * + * 1) Tell lockdep that interrupts are disabled + * 2) Invoke context tracking if enabled to reactivate RCU + * 3) Trace interrupts off state + * + * Invoked from architecture specific code after exiting a guest. + * Must be invoked with interrupts disabled and the caller must be + * non-instrumentable. + * The caller has to invoke guest_timing_exit_irqoff() after this. + * + * Note: this is analogous to enter_from_user_mode(). + */ +static __always_inline void guest_state_exit_irqoff(void) +{ + lockdep_hardirqs_off(CALLER_ADDR0); + guest_context_exit_irqoff(); + + instrumentation_begin(); + trace_hardirqs_off_finish(); + instrumentation_end(); +} + static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { /* diff --git a/include/linux/libata.h b/include/linux/libata.h index 605756f645bee8546c07489f8f86e922b77b7976..7f99b4d788223b67353b080b083bf642433bbf51 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -380,6 +380,7 @@ enum { ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ ATA_HORKAGE_NO_ID_DEV_LOG = (1 << 28), /* Identify device log missing */ + ATA_HORKAGE_NO_LOG_DIR = (1 << 29), /* Do not read log directory */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index a5a724c308d8d2198405ce67c04a2fd9122a6598..819ec92dc2a8255ee3914565a661ec21df1c4417 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -80,7 +80,7 @@ LSM_HOOK(int, 0, sb_clone_mnt_opts, const struct super_block *oldsb, unsigned long *set_kern_flags) LSM_HOOK(int, 0, move_mount, const struct path *from_path, const struct path *to_path) -LSM_HOOK(int, 0, dentry_init_security, struct dentry *dentry, +LSM_HOOK(int, -EOPNOTSUPP, dentry_init_security, struct dentry *dentry, int mode, const struct qstr *name, const char **xattr_name, void **ctx, u32 *ctxlen) LSM_HOOK(int, 0, dentry_create_files_as, struct dentry *dentry, int mode, diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b72d75141e125b50b777df27baebdcd938292d8b..0abbd685703b9ac9a52df62b13a7b3c7b1f2d49f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -219,7 +219,7 @@ struct obj_cgroup { struct mem_cgroup *memcg; atomic_t nr_charged_bytes; union { - struct list_head list; + struct list_head list; /* protected by objcg_lock */ struct rcu_head rcu; }; }; @@ -315,7 +315,8 @@ struct mem_cgroup { #ifdef CONFIG_MEMCG_KMEM int kmemcg_id; struct obj_cgroup __rcu *objcg; - struct list_head objcg_list; /* list of inherited objcgs */ + /* list of inherited objcgs, protected by objcg_lock */ + struct list_head objcg_list; #endif MEMCG_PADDING(_pad2_); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 4850cc5bf81386138bea122559e6d4d7e81e5a24..db96e10eb8da227728ae4daa362ef3247e7de6b9 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -40,6 +40,8 @@ extern int migrate_huge_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page); extern int migrate_page_move_mapping(struct address_space *mapping, struct page *newpage, struct page *page, int extra_count); +void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, + spinlock_t *ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); void folio_migrate_copy(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, diff --git a/include/linux/mm.h b/include/linux/mm.h index aa47705191bcdfa3342b8688a6dbcd6051944470..213cc569b19223c8738d980bf57ec0308a41de93 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1199,6 +1199,26 @@ static inline void folio_put(struct folio *folio) __put_page(&folio->page); } +/** + * folio_put_refs - Reduce the reference count on a folio. + * @folio: The folio. + * @refs: The amount to subtract from the folio's reference count. + * + * If the folio's reference count reaches zero, the memory will be + * released back to the page allocator and may be used by another + * allocation immediately. Do not access the memory or the struct folio + * after calling folio_put_refs() unless you can be sure that these weren't + * the last references. + * + * Context: May be called in process or interrupt context, but not in NMI + * context. May be called while holding a spinlock. + */ +static inline void folio_put_refs(struct folio *folio, int refs) +{ + if (folio_ref_sub_and_test(folio, refs)) + __put_page(&folio->page); +} + static inline void put_page(struct page *page) { struct folio *folio = page_folio(page); @@ -1486,11 +1506,18 @@ static inline u8 page_kasan_tag(const struct page *page) static inline void page_kasan_tag_set(struct page *page, u8 tag) { - if (kasan_enabled()) { - tag ^= 0xff; - page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); - page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; - } + unsigned long old_flags, flags; + + if (!kasan_enabled()) + return; + + tag ^= 0xff; + old_flags = READ_ONCE(page->flags); + do { + flags = old_flags; + flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); + flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; + } while (unlikely(!try_cmpxchg(&page->flags, &old_flags, flags))); } static inline void page_kasan_tag_reset(struct page *page) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9db36dc5d4cf8eb8ffa5f2ac5165566fbde68098..5140e5feb486613cdf42df80b4403c68480db8c6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -261,6 +261,7 @@ static_assert(sizeof(struct page) == sizeof(struct folio)); static_assert(offsetof(struct page, pg) == offsetof(struct folio, fl)) FOLIO_MATCH(flags, flags); FOLIO_MATCH(lru, lru); +FOLIO_MATCH(mapping, mapping); FOLIO_MATCH(compound_head, lru); FOLIO_MATCH(index, index); FOLIO_MATCH(private, private); diff --git a/include/linux/mount.h b/include/linux/mount.h index 5d92a7e1a742d6c91403854282779426f5bfcbb9..7f18a7555dff7299b864cb391290aca9f4e48fa8 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -113,9 +113,6 @@ extern void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list); extern void mark_mounts_for_expiry(struct list_head *mounts); extern dev_t name_to_dev_t(const char *name); - -extern unsigned int sysctl_mount_max; - extern bool path_is_mountpoint(const struct path *path); extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3213c7227b59bd4f18a57d90c848e415fcd3b253..e490b84732d1654bf067b30f2bb0b0825f88dea9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2548,6 +2548,7 @@ struct packet_type { struct net_device *); bool (*id_match)(struct packet_type *ptype, struct sock *sk); + struct net *af_packet_net; void *af_packet_priv; struct list_head list; }; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index b46c39d98bbd2c1dfc513f13ab03ba56a2342297..614f22213e21709ea6eb4f6cdf424e2994047ed7 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -244,6 +244,13 @@ struct netfs_cache_ops { int (*prepare_write)(struct netfs_cache_resources *cres, loff_t *_start, size_t *_len, loff_t i_size, bool no_space_allocated_yet); + + /* Query the occupancy of the cache in a region, returning where the + * next chunk of data starts and how long it is. + */ + int (*query_occupancy)(struct netfs_cache_resources *cres, + loff_t start, size_t len, size_t granularity, + loff_t *_data_start, size_t *_data_len); }; struct readahead_control; diff --git a/include/linux/nfs.h b/include/linux/nfs.h index 0dc7ad38a0da4f4df118806c8f9cf01876b1bcd5..b06375e88e589a40e5b58775d2a586c949166378 100644 --- a/include/linux/nfs.h +++ b/include/linux/nfs.h @@ -36,14 +36,6 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc memcpy(target->data, source->data, source->size); } - -/* - * This is really a general kernel constant, but since nothing like - * this is defined in the kernel headers, I have to do it here. - */ -#define NFS_OFFSET_MAX ((__s64)((~(__u64)0) >> 1)) - - enum nfs3_stable_how { NFS_UNSTABLE = 0, NFS_DATA_SYNC = 1, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 00835bacd236dd73f7eef4c800a30d925846b5e2..68f81d8d36def119a1c53215cb6d6ddef27526f1 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -61,7 +61,9 @@ struct nfs_access_entry { struct rb_node rb_node; struct list_head lru; - const struct cred * cred; + kuid_t fsuid; + kgid_t fsgid; + struct group_info *group_info; __u32 mask; struct rcu_head rcu_head; }; @@ -105,6 +107,7 @@ struct nfs_open_dir_context { __u64 dup_cookie; pgoff_t page_index; signed char duped; + bool eof; }; /* @@ -395,7 +398,7 @@ extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fa extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int); -extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); +extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *, const struct cred *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct user_namespace *, struct inode *, int); extern int nfs_open(struct inode *, struct file *); @@ -532,8 +535,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); extern void nfs_access_zap_cache(struct inode *inode); -extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, - bool may_block); +extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, + u32 *mask, bool may_block); /* * linux/fs/nfs/symlink.c diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 77b2dba27bbb7572b2f0bee37db866246494a750..ca0959e51e817cfd5002d79200da255077f85811 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -266,6 +266,8 @@ struct nfs_server { #define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ATOMIC_OPEN (1U << 4) #define NFS_CAP_LGOPEN (1U << 5) +#define NFS_CAP_CASE_INSENSITIVE (1U << 6) +#define NFS_CAP_CASE_PRESERVING (1U << 7) #define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_STATEID_NFSV41 (1U << 16) @@ -282,5 +284,5 @@ struct nfs_server { #define NFS_CAP_COPY_NOTIFY (1U << 27) #define NFS_CAP_XATTR (1U << 28) #define NFS_CAP_READ_PLUS (1U << 29) - +#define NFS_CAP_FS_LOCATIONS (1U << 30) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 967a0098f0a9795a71d696c2464c007e9e8656a0..728cb0c1f0b6da7b274c8bdb560807956d63cd32 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1194,6 +1194,8 @@ struct nfs4_server_caps_res { u32 has_links; u32 has_symlinks; u32 fh_expire_type; + u32 case_insensitive; + u32 case_preserving; }; #define NFS4_PATHNAME_MAXCOMPONENTS 512 @@ -1737,7 +1739,7 @@ struct nfs_rpc_ops { struct nfs_fh *, struct nfs_fattr *); int (*lookupp) (struct inode *, struct nfs_fh *, struct nfs_fattr *); - int (*access) (struct inode *, struct nfs_access_entry *); + int (*access) (struct inode *, struct nfs_access_entry *, const struct cred *); int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); int (*create) (struct inode *, struct dentry *, @@ -1795,6 +1797,7 @@ struct nfs_rpc_ops { struct nfs_server *(*create_server)(struct fs_context *); struct nfs_server *(*clone_server)(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, rpc_authflavor_t); + int (*discover_trunking)(struct nfs_server *, struct nfs_fh *); }; /* diff --git a/include/linux/of.h b/include/linux/of.h index ff143a027abc4b2752d2457a280a39811ce32c0e..2dc77430a91abfe832ac87a7d503678a9b2f8c9b 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -364,18 +364,12 @@ extern const struct of_device_id *of_match_node( const struct of_device_id *matches, const struct device_node *node); extern int of_modalias_node(struct device_node *node, char *modalias, int len); extern void of_print_phandle_args(const char *msg, const struct of_phandle_args *args); -extern struct device_node *of_parse_phandle(const struct device_node *np, - const char *phandle_name, - int index); -extern int of_parse_phandle_with_args(const struct device_node *np, - const char *list_name, const char *cells_name, int index, - struct of_phandle_args *out_args); +extern int __of_parse_phandle_with_args(const struct device_node *np, + const char *list_name, const char *cells_name, int cell_count, + int index, struct of_phandle_args *out_args); extern int of_parse_phandle_with_args_map(const struct device_node *np, const char *list_name, const char *stem_name, int index, struct of_phandle_args *out_args); -extern int of_parse_phandle_with_fixed_args(const struct device_node *np, - const char *list_name, int cells_count, int index, - struct of_phandle_args *out_args); extern int of_count_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name); @@ -416,130 +410,6 @@ extern int of_detach_node(struct device_node *); #define of_match_ptr(_ptr) (_ptr) -/** - * of_property_read_u8_array - Find and read an array of u8 from a property. - * - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * @out_values: pointer to return value, modified only if return value is 0. - * @sz: number of array elements to read - * - * Search for a property in a device node and read 8-bit value(s) from - * it. - * - * dts entry of array should be like: - * ``property = /bits/ 8 <0x50 0x60 0x70>;`` - * - * Return: 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. - * - * The out_values is modified only if a valid u8 value can be decoded. - */ -static inline int of_property_read_u8_array(const struct device_node *np, - const char *propname, - u8 *out_values, size_t sz) -{ - int ret = of_property_read_variable_u8_array(np, propname, out_values, - sz, 0); - if (ret >= 0) - return 0; - else - return ret; -} - -/** - * of_property_read_u16_array - Find and read an array of u16 from a property. - * - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * @out_values: pointer to return value, modified only if return value is 0. - * @sz: number of array elements to read - * - * Search for a property in a device node and read 16-bit value(s) from - * it. - * - * dts entry of array should be like: - * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` - * - * Return: 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. - * - * The out_values is modified only if a valid u16 value can be decoded. - */ -static inline int of_property_read_u16_array(const struct device_node *np, - const char *propname, - u16 *out_values, size_t sz) -{ - int ret = of_property_read_variable_u16_array(np, propname, out_values, - sz, 0); - if (ret >= 0) - return 0; - else - return ret; -} - -/** - * of_property_read_u32_array - Find and read an array of 32 bit integers - * from a property. - * - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * @out_values: pointer to return value, modified only if return value is 0. - * @sz: number of array elements to read - * - * Search for a property in a device node and read 32-bit value(s) from - * it. - * - * Return: 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. - * - * The out_values is modified only if a valid u32 value can be decoded. - */ -static inline int of_property_read_u32_array(const struct device_node *np, - const char *propname, - u32 *out_values, size_t sz) -{ - int ret = of_property_read_variable_u32_array(np, propname, out_values, - sz, 0); - if (ret >= 0) - return 0; - else - return ret; -} - -/** - * of_property_read_u64_array - Find and read an array of 64 bit integers - * from a property. - * - * @np: device node from which the property value is to be read. - * @propname: name of the property to be searched. - * @out_values: pointer to return value, modified only if return value is 0. - * @sz: number of array elements to read - * - * Search for a property in a device node and read 64-bit value(s) from - * it. - * - * Return: 0 on success, -EINVAL if the property does not exist, - * -ENODATA if property does not have a value, and -EOVERFLOW if the - * property data isn't large enough. - * - * The out_values is modified only if a valid u64 value can be decoded. - */ -static inline int of_property_read_u64_array(const struct device_node *np, - const char *propname, - u64 *out_values, size_t sz) -{ - int ret = of_property_read_variable_u64_array(np, propname, out_values, - sz, 0); - if (ret >= 0) - return 0; - else - return ret; -} - /* * struct property *prop; * const __be32 *p; @@ -734,32 +604,6 @@ static inline int of_property_count_elems_of_size(const struct device_node *np, return -ENOSYS; } -static inline int of_property_read_u8_array(const struct device_node *np, - const char *propname, u8 *out_values, size_t sz) -{ - return -ENOSYS; -} - -static inline int of_property_read_u16_array(const struct device_node *np, - const char *propname, u16 *out_values, size_t sz) -{ - return -ENOSYS; -} - -static inline int of_property_read_u32_array(const struct device_node *np, - const char *propname, - u32 *out_values, size_t sz) -{ - return -ENOSYS; -} - -static inline int of_property_read_u64_array(const struct device_node *np, - const char *propname, - u64 *out_values, size_t sz) -{ - return -ENOSYS; -} - static inline int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value) { @@ -865,18 +709,12 @@ static inline int of_property_read_string_helper(const struct device_node *np, return -ENOSYS; } -static inline struct device_node *of_parse_phandle(const struct device_node *np, - const char *phandle_name, - int index) -{ - return NULL; -} - -static inline int of_parse_phandle_with_args(const struct device_node *np, - const char *list_name, - const char *cells_name, - int index, - struct of_phandle_args *out_args) +static inline int __of_parse_phandle_with_args(const struct device_node *np, + const char *list_name, + const char *cells_name, + int cell_count, + int index, + struct of_phandle_args *out_args) { return -ENOSYS; } @@ -890,13 +728,6 @@ static inline int of_parse_phandle_with_args_map(const struct device_node *np, return -ENOSYS; } -static inline int of_parse_phandle_with_fixed_args(const struct device_node *np, - const char *list_name, int cells_count, int index, - struct of_phandle_args *out_args) -{ - return -ENOSYS; -} - static inline int of_count_phandle_with_args(const struct device_node *np, const char *list_name, const char *cells_name) @@ -1077,6 +908,117 @@ static inline bool of_node_is_type(const struct device_node *np, const char *typ return np && match && type && !strcmp(match, type); } +/** + * of_parse_phandle - Resolve a phandle property to a device_node pointer + * @np: Pointer to device node holding phandle property + * @phandle_name: Name of property holding a phandle value + * @index: For properties holding a table of phandles, this is the index into + * the table + * + * Return: The device_node pointer with refcount incremented. Use + * of_node_put() on it when done. + */ +static inline struct device_node *of_parse_phandle(const struct device_node *np, + const char *phandle_name, + int index) +{ + struct of_phandle_args args; + + if (__of_parse_phandle_with_args(np, phandle_name, NULL, 0, + index, &args)) + return NULL; + + return args.np; +} + +/** + * of_parse_phandle_with_args() - Find a node pointed by phandle in a list + * @np: pointer to a device tree node containing a list + * @list_name: property name that contains a list + * @cells_name: property name that specifies phandles' arguments count + * @index: index of a phandle to parse out + * @out_args: optional pointer to output arguments structure (will be filled) + * + * This function is useful to parse lists of phandles and their arguments. + * Returns 0 on success and fills out_args, on error returns appropriate + * errno value. + * + * Caller is responsible to call of_node_put() on the returned out_args->np + * pointer. + * + * Example:: + * + * phandle1: node1 { + * #list-cells = <2>; + * }; + * + * phandle2: node2 { + * #list-cells = <1>; + * }; + * + * node3 { + * list = <&phandle1 1 2 &phandle2 3>; + * }; + * + * To get a device_node of the ``node2`` node you may call this: + * of_parse_phandle_with_args(node3, "list", "#list-cells", 1, &args); + */ +static inline int of_parse_phandle_with_args(const struct device_node *np, + const char *list_name, + const char *cells_name, + int index, + struct of_phandle_args *out_args) +{ + int cell_count = -1; + + /* If cells_name is NULL we assume a cell count of 0 */ + if (!cells_name) + cell_count = 0; + + return __of_parse_phandle_with_args(np, list_name, cells_name, + cell_count, index, out_args); +} + +/** + * of_parse_phandle_with_fixed_args() - Find a node pointed by phandle in a list + * @np: pointer to a device tree node containing a list + * @list_name: property name that contains a list + * @cell_count: number of argument cells following the phandle + * @index: index of a phandle to parse out + * @out_args: optional pointer to output arguments structure (will be filled) + * + * This function is useful to parse lists of phandles and their arguments. + * Returns 0 on success and fills out_args, on error returns appropriate + * errno value. + * + * Caller is responsible to call of_node_put() on the returned out_args->np + * pointer. + * + * Example:: + * + * phandle1: node1 { + * }; + * + * phandle2: node2 { + * }; + * + * node3 { + * list = <&phandle1 0 2 &phandle2 2 3>; + * }; + * + * To get a device_node of the ``node2`` node you may call this: + * of_parse_phandle_with_fixed_args(node3, "list", 2, 1, &args); + */ +static inline int of_parse_phandle_with_fixed_args(const struct device_node *np, + const char *list_name, + int cell_count, + int index, + struct of_phandle_args *out_args) +{ + return __of_parse_phandle_with_args(np, list_name, NULL, cell_count, + index, out_args); +} + /** * of_property_count_u8_elems - Count the number of u8 elements in a property * @@ -1236,6 +1178,130 @@ static inline bool of_property_read_bool(const struct device_node *np, return prop ? true : false; } +/** + * of_property_read_u8_array - Find and read an array of u8 from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 8-bit value(s) from + * it. + * + * dts entry of array should be like: + * ``property = /bits/ 8 <0x50 0x60 0x70>;`` + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_values is modified only if a valid u8 value can be decoded. + */ +static inline int of_property_read_u8_array(const struct device_node *np, + const char *propname, + u8 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u8_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + +/** + * of_property_read_u16_array - Find and read an array of u16 from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 16-bit value(s) from + * it. + * + * dts entry of array should be like: + * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_values is modified only if a valid u16 value can be decoded. + */ +static inline int of_property_read_u16_array(const struct device_node *np, + const char *propname, + u16 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u16_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + +/** + * of_property_read_u32_array - Find and read an array of 32 bit integers + * from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 32-bit value(s) from + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_values is modified only if a valid u32 value can be decoded. + */ +static inline int of_property_read_u32_array(const struct device_node *np, + const char *propname, + u32 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u32_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + +/** + * of_property_read_u64_array - Find and read an array of 64 bit integers + * from a property. + * + * @np: device node from which the property value is to be read. + * @propname: name of the property to be searched. + * @out_values: pointer to return value, modified only if return value is 0. + * @sz: number of array elements to read + * + * Search for a property in a device node and read 64-bit value(s) from + * it. + * + * Return: 0 on success, -EINVAL if the property does not exist, + * -ENODATA if property does not have a value, and -EOVERFLOW if the + * property data isn't large enough. + * + * The out_values is modified only if a valid u64 value can be decoded. + */ +static inline int of_property_read_u64_array(const struct device_node *np, + const char *propname, + u64 *out_values, size_t sz) +{ + int ret = of_property_read_variable_u64_array(np, propname, out_values, + sz, 0); + if (ret >= 0) + return 0; + else + return ret; +} + static inline int of_property_read_u8(const struct device_node *np, const char *propname, u8 *out_value) diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h index 38cace1da7b69ebfe7c4dbac2c3a121e40dc0e77..01e16c7696ec98a1d5ffed2417b33d09306b1c41 100644 --- a/include/linux/page_table_check.h +++ b/include/linux/page_table_check.h @@ -26,6 +26,9 @@ void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud); +void __page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd); static inline void page_table_check_alloc(struct page *page, unsigned int order) { @@ -100,6 +103,16 @@ static inline void page_table_check_pud_set(struct mm_struct *mm, __page_table_check_pud_set(mm, addr, pudp, pud); } +static inline void page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __page_table_check_pte_clear_range(mm, addr, pmd); +} + #else static inline void page_table_check_alloc(struct page *page, unsigned int order) @@ -143,5 +156,11 @@ static inline void page_table_check_pud_set(struct mm_struct *mm, { } +static inline void page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ +} + #endif /* CONFIG_PAGE_TABLE_CHECK */ #endif /* __LINUX_PAGE_TABLE_CHECK_H */ diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index dda8d5868c8163ba07f438b586dc6cb516acd7f6..67b1246f136b425f950e961cb65c336125509c19 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -111,6 +111,7 @@ static_assert(offsetof(struct pagevec, pages) == static inline void folio_batch_init(struct folio_batch *fbatch) { fbatch->nr = 0; + fbatch->percpu_pvec_drained = false; } static inline unsigned int folio_batch_count(struct folio_batch *fbatch) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 117f230bcdfd80076918e8534169dc55c74e6ef8..733649184b27906c9dee135c03c33548aff13d2e 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -693,18 +693,6 @@ struct perf_event { u64 total_time_running; u64 tstamp; - /* - * timestamp shadows the actual context timing but it can - * be safely used in NMI interrupt context. It reflects the - * context time as it was when the event was last scheduled in, - * or when ctx_sched_in failed to schedule the event because we - * run out of PMC. - * - * ctx_time already accounts for ctx->timestamp. Therefore to - * compute ctx_time for a sample, simply add perf_clock(). - */ - u64 shadow_ctx_time; - struct perf_event_attr attr; u16 header_size; u16 id_header_size; @@ -852,6 +840,7 @@ struct perf_event_context { */ u64 time; u64 timestamp; + u64 timeoffset; /* * These fields let us detect when two contexts have both @@ -934,6 +923,8 @@ struct bpf_perf_event_data_kern { struct perf_cgroup_info { u64 time; u64 timestamp; + u64 timeoffset; + int active; }; struct perf_cgroup { diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index bc8713a76e0343ac3760eb76ce9dc5bbdd5b36d6..f4f4077b97aab42bcc33797716a155decab047eb 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -62,6 +62,7 @@ static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); } +#define pte_index pte_index #ifndef pmd_index static inline unsigned long pmd_index(unsigned long address) diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 7c7e627503d228aef991a71b86d434b0900a6033..07481bb87d4e0c40a8e6f0b289e6d30486d2b75b 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -86,4 +86,9 @@ extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); void pid_idr_init(void); +static inline bool task_is_in_init_pid_ns(struct task_struct *tsk) +{ + return task_active_pid_ns(tsk) == &init_pid_ns; +} + #endif /* _LINUX_PID_NS_H */ diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index fc5642431b923cd8c1bf33d884ad348a43924377..c00c618ef290d726874d18c00c174e71d215a4ae 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -238,10 +238,6 @@ void pipe_lock(struct pipe_inode_info *); void pipe_unlock(struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); -extern unsigned int pipe_max_size; -extern unsigned long pipe_user_pages_hard; -extern unsigned long pipe_user_pages_soft; - /* Wait for a pipe to be readable/writable while dropping the pipe lock */ void pipe_wait_readable(struct pipe_inode_info *); void pipe_wait_writable(struct pipe_inode_info *); diff --git a/include/linux/poll.h b/include/linux/poll.h index 1cdc32b1f1b0836e6dccf2ecf446b9aeab0a950b..a9e0e1c2d1f2ff89d99a9377a1de7d0dc78483cd 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -8,12 +8,10 @@ #include #include #include -#include #include #include #include -extern struct ctl_table epoll_table[]; /* for sysctl */ /* ~832 bytes of stack space used max in sys_select/sys_poll before allocating additional memory. */ #ifdef __clang__ diff --git a/include/linux/printk.h b/include/linux/printk.h index 9497f6b983399ac0cf6a21d5e4b57f9e144e2d74..1522df223c0f7650a80cd42b1c43fedb14c8a8c7 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -183,10 +183,6 @@ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, extern int printk_delay_msec; extern int dmesg_restrict; -extern int -devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf, - size_t *lenp, loff_t *ppos); - extern void wake_up_klogd(void); char *log_buf_addr_get(void); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 01b9268451a8ecc71350d670950d8f91b810ccfb..81d6e4ec2294b5576ca973d98d0d61e78a215838 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -110,7 +110,16 @@ extern struct proc_dir_entry *proc_create_data(const char *, umode_t, struct proc_dir_entry *proc_create(const char *name, umode_t mode, struct proc_dir_entry *parent, const struct proc_ops *proc_ops); extern void proc_set_size(struct proc_dir_entry *, loff_t); extern void proc_set_user(struct proc_dir_entry *, kuid_t, kgid_t); -extern void *PDE_DATA(const struct inode *); + +/* + * Obtain the private data passed by user through proc_create_data() or + * related. + */ +static inline void *pde_data(const struct inode *inode) +{ + return inode->i_private; +} + extern void *proc_get_parent_data(const struct inode *); extern void proc_remove(struct proc_dir_entry *); extern void remove_proc_entry(const char *, struct proc_dir_entry *); @@ -191,7 +200,7 @@ proc_create_data(const char *name, umode_t mode, struct proc_dir_entry *parent, static inline void proc_set_size(struct proc_dir_entry *de, loff_t size) {} static inline void proc_set_user(struct proc_dir_entry *de, kuid_t uid, kgid_t gid) {} -static inline void *PDE_DATA(const struct inode *inode) {BUG(); return NULL;} +static inline void *pde_data(const struct inode *inode) {BUG(); return NULL;} static inline void *proc_get_parent_data(const struct inode *inode) { BUG(); return NULL; } static inline void proc_remove(struct proc_dir_entry *de) {} diff --git a/include/linux/psi.h b/include/linux/psi.h index a70ca833c6d778cbc8992993c922227538b808ec..7f7d1d88c3bbd5a36477b974a348366e9e3d5544 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -25,18 +25,17 @@ void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res); - -#ifdef CONFIG_CGROUPS -int psi_cgroup_alloc(struct cgroup *cgrp); -void psi_cgroup_free(struct cgroup *cgrp); -void cgroup_move_task(struct task_struct *p, struct css_set *to); - struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); +void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); + +#ifdef CONFIG_CGROUPS +int psi_cgroup_alloc(struct cgroup *cgrp); +void psi_cgroup_free(struct cgroup *cgrp); +void cgroup_move_task(struct task_struct *p, struct css_set *to); #endif #else /* CONFIG_PSI */ diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 516c0fe836fd5be6b17f29c73ffcac892bf9addc..1a3cef26d129d5fbc33a4f9dea4be54eb44600e2 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -141,9 +141,6 @@ struct psi_trigger { * events to one per window */ u64 last_event_time; - - /* Refcounting to prevent premature destruction */ - struct kref refcount; }; struct psi_group { diff --git a/include/linux/quota.h b/include/linux/quota.h index 18ebd39c948713aeb52a18719fb8e87807250805..fd692b4a41d5fbe0dae06b46da3ebc47c4747b2e 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -91,7 +91,7 @@ extern bool qid_valid(struct kqid qid); * * When there is no mapping defined for the user-namespace, type, * qid tuple an invalid kqid is returned. Callers are expected to - * test for and handle handle invalid kqids being returned. + * test for and handle invalid kqids being returned. * Invalid kqids may be tested for using qid_valid(). */ static inline struct kqid make_kqid(struct user_namespace *from, diff --git a/include/linux/ref_tracker.h b/include/linux/ref_tracker.h index c11c9db5825cf933acf529c83db441a818135f29..60f3453be23e6881725d383c55f93143fda1e7a2 100644 --- a/include/linux/ref_tracker.h +++ b/include/linux/ref_tracker.h @@ -4,6 +4,7 @@ #include #include #include +#include struct ref_tracker; @@ -26,6 +27,7 @@ static inline void ref_tracker_dir_init(struct ref_tracker_dir *dir, spin_lock_init(&dir->lock); dir->quarantine_avail = quarantine_count; refcount_set(&dir->untracked, 1); + stack_depot_init(); } void ref_tracker_dir_exit(struct ref_tracker_dir *dir); diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 2c0ad417ce3c034b40f04157a062269c130a0f36..8f416c5e929ea865417b426079cc7c76eb47ec42 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -55,6 +55,12 @@ do { \ #define write_lock(lock) _raw_write_lock(lock) #define read_lock(lock) _raw_read_lock(lock) +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define write_lock_nested(lock, subclass) _raw_write_lock_nested(lock, subclass) +#else +#define write_lock_nested(lock, subclass) _raw_write_lock(lock) +#endif + #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) #define read_lock_irqsave(lock, flags) \ diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index f1db6f17c4fba4c8b96f8ddb6ded8e25235c64a8..dceb0a59b6927e1052fde9ae68cdeb9f586f94e8 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -17,6 +17,7 @@ void __lockfunc _raw_read_lock(rwlock_t *lock) __acquires(lock); void __lockfunc _raw_write_lock(rwlock_t *lock) __acquires(lock); +void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass) __acquires(lock); void __lockfunc _raw_read_lock_bh(rwlock_t *lock) __acquires(lock); void __lockfunc _raw_write_lock_bh(rwlock_t *lock) __acquires(lock); void __lockfunc _raw_read_lock_irq(rwlock_t *lock) __acquires(lock); @@ -209,6 +210,13 @@ static inline void __raw_write_lock(rwlock_t *lock) LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock); } +static inline void __raw_write_lock_nested(rwlock_t *lock, int subclass) +{ + preempt_disable(); + rwlock_acquire(&lock->dep_map, subclass, 0, _RET_IP_); + LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock); +} + #endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */ static inline void __raw_write_unlock(rwlock_t *lock) diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h index 49c1f3842ed5b706c9507721f2a6151e55c71105..8544ff05e594d7f77d2a2f02640d64ffee6ed52f 100644 --- a/include/linux/rwlock_rt.h +++ b/include/linux/rwlock_rt.h @@ -28,6 +28,7 @@ extern void rt_read_lock(rwlock_t *rwlock); extern int rt_read_trylock(rwlock_t *rwlock); extern void rt_read_unlock(rwlock_t *rwlock); extern void rt_write_lock(rwlock_t *rwlock); +extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass); extern int rt_write_trylock(rwlock_t *rwlock); extern void rt_write_unlock(rwlock_t *rwlock); @@ -83,6 +84,15 @@ static __always_inline void write_lock(rwlock_t *rwlock) rt_write_lock(rwlock); } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static __always_inline void write_lock_nested(rwlock_t *rwlock, int subclass) +{ + rt_write_lock_nested(rwlock, subclass); +} +#else +#define write_lock_nested(lock, subclass) rt_write_lock(((void)(subclass), (lock))) +#endif + static __always_inline void write_lock_bh(rwlock_t *rwlock) { local_bh_disable(); diff --git a/include/linux/sched.h b/include/linux/sched.h index 508b91d574706814420b3c5e0a6d74861486cb14..75ba8aa60248b57be7c82090346bb5057e39ef95 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -619,10 +619,6 @@ struct sched_dl_entity { * task has to wait for a replenishment to be performed at the * next firing of dl_timer. * - * @dl_boosted tells if we are boosted due to DI. If so we are - * outside bandwidth enforcement mechanism (but only until we - * exit the critical section); - * * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. * @@ -1684,7 +1680,6 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 304f431178fd99515edb445297006b96c98b4e8c..c19dd5a2c05c679ad7d47f6edd4d9ac27af330d0 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -7,20 +7,8 @@ struct ctl_table; #ifdef CONFIG_DETECT_HUNG_TASK - -#ifdef CONFIG_SMP -extern unsigned int sysctl_hung_task_all_cpu_backtrace; -#else -#define sysctl_hung_task_all_cpu_backtrace 0 -#endif /* CONFIG_SMP */ - -extern int sysctl_hung_task_check_count; -extern unsigned int sysctl_hung_task_panic; +/* used for hung_task and block/ */ extern unsigned long sysctl_hung_task_timeout_secs; -extern unsigned long sysctl_hung_task_check_interval_secs; -extern int sysctl_hung_task_warnings; -int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); #else /* Avoid need for ifdefs elsewhere in the code */ enum { sysctl_hung_task_timeout_secs = 0 }; diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 72dbb44a4573b77c617cde0e5e75652240501d41..88cc16444b43125e5fbd274868013fe29869e465 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -209,7 +209,7 @@ static const struct file_operations __name ## _fops = { \ #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ - return single_open(file, __name ## _show, PDE_DATA(inode)); \ + return single_open(file, __name ## _show, pde_data(inode)); \ } \ \ static const struct proc_ops __name ## _proc_ops = { \ diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 166158b6e917ac6575236c5618670808a88b9b86..e65b80ed09e771c6eb471dbbc090a4c75c6025d7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -83,8 +83,7 @@ extern void shmem_unlock_mapping(struct address_space *mapping); extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); -extern int shmem_unuse(unsigned int type, bool frontswap, - unsigned long *fs_pages_to_unuse); +int shmem_unuse(unsigned int type); extern bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, pgoff_t index); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bf11e1fbd69b65660ca217c5d22623a9b7046e9c..8a636e678902db73ef95a1ce3effc8d3d160abb2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -318,7 +318,7 @@ enum skb_drop_reason { SKB_DROP_REASON_NO_SOCKET, SKB_DROP_REASON_PKT_TOO_SMALL, SKB_DROP_REASON_TCP_CSUM, - SKB_DROP_REASON_TCP_FILTER, + SKB_DROP_REASON_SOCKET_FILTER, SKB_DROP_REASON_UDP_CSUM, SKB_DROP_REASON_MAX, }; diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index d0d188861ad69a1b574ea6ce96fb8dfaa6a59c3d..b8ba00ccccdebd44e7ad723c66e772e434e73aa8 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -59,6 +59,7 @@ #define _raw_spin_lock_nested(lock, subclass) __LOCK(lock) #define _raw_read_lock(lock) __LOCK(lock) #define _raw_write_lock(lock) __LOCK(lock) +#define _raw_write_lock_nested(lock, subclass) __LOCK(lock) #define _raw_spin_lock_bh(lock) __LOCK_BH(lock) #define _raw_read_lock_bh(lock) __LOCK_BH(lock) #define _raw_write_lock_bh(lock) __LOCK_BH(lock) diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index c34b55a6e5540c69929314c5c0c8e3ae29b7c6c7..17f992fe6355b6d6c8abd0c863a8aa8708888b0f 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -19,6 +19,22 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags, bool can_alloc); +/* + * Every user of stack depot has to call this during its own init when it's + * decided that it will be calling stack_depot_save() later. + * + * The alternative is to select STACKDEPOT_ALWAYS_INIT to have stack depot + * enabled as part of mm_init(), for subsystems where it's known at compile time + * that stack depot will be used. + */ +int stack_depot_init(void); + +#ifdef CONFIG_STACKDEPOT_ALWAYS_INIT +static inline int stack_depot_early_init(void) { return stack_depot_init(); } +#else +static inline int stack_depot_early_init(void) { return 0; } +#endif + depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); @@ -30,13 +46,4 @@ int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, void stack_depot_print(depot_stack_handle_t stack); -#ifdef CONFIG_STACKDEPOT -int stack_depot_init(void); -#else -static inline int stack_depot_init(void) -{ - return 0; -} -#endif /* CONFIG_STACKDEPOT */ - #endif diff --git a/include/linux/stackleak.h b/include/linux/stackleak.h index a59db2f08e76b7ea6804fdccaa03c3e817b9a9ea..ccaab2043fcd513febda43f1a1dc61d5062791ef 100644 --- a/include/linux/stackleak.h +++ b/include/linux/stackleak.h @@ -23,11 +23,6 @@ static inline void stackleak_task_init(struct task_struct *t) # endif } -#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE -int stack_erasing_sysctl(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); -#endif - #else /* !CONFIG_GCC_PLUGIN_STACKLEAK */ static inline void stackleak_task_init(struct task_struct *t) { } #endif diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5785d909c32150e113bfacb15d83f9b4ef170232..300273ff40cc1c8f39d6f44472314a6c8e4229d9 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -430,15 +430,7 @@ struct platform_hibernation_ops { #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ -extern void __register_nosave_region(unsigned long b, unsigned long e, int km); -static inline void __init register_nosave_region(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 0); -} -static inline void __init register_nosave_region_late(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 1); -} +extern void register_nosave_region(unsigned long b, unsigned long e); extern int swsusp_page_is_forbidden(struct page *); extern void swsusp_set_page_free(struct page *); extern void swsusp_unset_page_free(struct page *); @@ -458,7 +450,6 @@ int pfn_is_nosave(unsigned long pfn); int hibernate_quiet_exec(int (*func)(void *data), void *data); #else /* CONFIG_HIBERNATION */ static inline void register_nosave_region(unsigned long b, unsigned long e) {} -static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} @@ -506,14 +497,14 @@ extern void ksys_sync_helper(void); /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; -extern unsigned int pm_wakeup_irq; extern suspend_state_t pm_suspend_target_state; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); extern void pm_system_cancel_wakeup(void); -extern void pm_wakeup_clear(bool reset); +extern void pm_wakeup_clear(unsigned int irq_number); extern void pm_system_irq_wakeup(unsigned int irq_number); +extern unsigned int pm_wakeup_irq(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h index e06febf629788b10833ac1b4be94001357b3ca8c..54078542134c137c06867a10b14aab6ba40fbe3a 100644 --- a/include/linux/swapfile.h +++ b/include/linux/swapfile.h @@ -6,10 +6,7 @@ * these were static in swapfile.c but frontswap.c needs them and we don't * want to expose them to the dozens of source files that include swap.h */ -extern spinlock_t swap_lock; -extern struct plist_head swap_active_head; extern struct swap_info_struct *swap_info[]; -extern int try_to_unuse(unsigned int, bool, unsigned long); extern unsigned long generic_max_swapfile_size(void); extern unsigned long max_swapfile_size(void); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 1fa2b69c6fc3d23d52a848eaefa391506249db46..6353d6db69b2eddc767c4ccc51e7c4fa3b44690e 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -38,12 +38,28 @@ struct ctl_table_header; struct ctl_dir; /* Keep the same order as in fs/proc/proc_sysctl.c */ -#define SYSCTL_ZERO ((void *)&sysctl_vals[0]) -#define SYSCTL_ONE ((void *)&sysctl_vals[1]) -#define SYSCTL_INT_MAX ((void *)&sysctl_vals[2]) +#define SYSCTL_NEG_ONE ((void *)&sysctl_vals[0]) +#define SYSCTL_ZERO ((void *)&sysctl_vals[1]) +#define SYSCTL_ONE ((void *)&sysctl_vals[2]) +#define SYSCTL_TWO ((void *)&sysctl_vals[3]) +#define SYSCTL_FOUR ((void *)&sysctl_vals[4]) +#define SYSCTL_ONE_HUNDRED ((void *)&sysctl_vals[5]) +#define SYSCTL_TWO_HUNDRED ((void *)&sysctl_vals[6]) +#define SYSCTL_ONE_THOUSAND ((void *)&sysctl_vals[7]) +#define SYSCTL_THREE_THOUSAND ((void *)&sysctl_vals[8]) +#define SYSCTL_INT_MAX ((void *)&sysctl_vals[9]) + +/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ +#define SYSCTL_MAXOLDUID ((void *)&sysctl_vals[10]) extern const int sysctl_vals[]; +#define SYSCTL_LONG_ZERO ((void *)&sysctl_long_vals[0]) +#define SYSCTL_LONG_ONE ((void *)&sysctl_long_vals[1]) +#define SYSCTL_LONG_MAX ((void *)&sysctl_long_vals[2]) + +extern const unsigned long sysctl_long_vals[]; + typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); @@ -178,6 +194,20 @@ struct ctl_path { #ifdef CONFIG_SYSCTL +#define DECLARE_SYSCTL_BASE(_name, _table) \ +static struct ctl_table _name##_base_table[] = { \ + { \ + .procname = #_name, \ + .mode = 0555, \ + .child = _table, \ + }, \ + { }, \ +} + +extern int __register_sysctl_base(struct ctl_table *base_table); + +#define register_sysctl_base(_name) __register_sysctl_base(_name##_base_table) + void proc_sys_poll_notify(struct ctl_table_poll *poll); extern void setup_sysctl_set(struct ctl_table_set *p, @@ -198,8 +228,19 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path, void unregister_sysctl_table(struct ctl_table_header * table); -extern int sysctl_init(void); +extern int sysctl_init_bases(void); +extern void __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name); +#define register_sysctl_init(path, table) __register_sysctl_init(path, table, #table) +extern struct ctl_table_header *register_sysctl_mount_point(const char *path); + void do_sysctl_args(void); +int do_proc_douintvec(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos, + int (*conv)(unsigned long *lvalp, + unsigned int *valp, + int write, void *data), + void *data); extern int pwrsw_enabled; extern int unaligned_enabled; @@ -207,16 +248,28 @@ extern int unaligned_dump_stack; extern int no_unaligned_warning; extern struct ctl_table sysctl_mount_point[]; -extern struct ctl_table random_table[]; -extern struct ctl_table firmware_config_table[]; -extern struct ctl_table epoll_table[]; #else /* CONFIG_SYSCTL */ + +#define DECLARE_SYSCTL_BASE(_name, _table) + +static inline int __register_sysctl_base(struct ctl_table *base_table) +{ + return 0; +} + +#define register_sysctl_base(table) __register_sysctl_base(table) + static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) { return NULL; } +static inline struct ctl_table_header *register_sysctl_mount_point(const char *path) +{ + return NULL; +} + static inline struct ctl_table_header *register_sysctl_paths( const struct ctl_path *path, struct ctl_table *table) { diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index 031f148ab37349d2f6c10841498cdf300d7dcd78..b5deafd91f67bb58f04c4c9189bf37b1fcfe28f1 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -91,6 +91,12 @@ fwnode_usb_role_switch_get(struct fwnode_handle *node) static inline void usb_role_switch_put(struct usb_role_switch *sw) { } +static inline struct usb_role_switch * +usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + return NULL; +} + static inline struct usb_role_switch * usb_role_switch_register(struct device *parent, const struct usb_role_switch_desc *desc) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 78ea3e332688f9985336c25d943f24345fdbc083..e7ce719838b5eca5d1824170e33fd6d176d9b983 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -6,6 +6,8 @@ #define RTR_SOLICITATION_INTERVAL (4*HZ) #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ +#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ + #define TEMP_VALID_LIFETIME (7*86400) #define TEMP_PREFERRED_LIFETIME (86400) #define REGEN_MAX_RETRY (3) diff --git a/include/net/ax25.h b/include/net/ax25.h index 526e49589197909b459de068222c4a9cf76050ba..8221af1811df5ba24403b2548b7f85aa5df6ab8c 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -239,6 +239,7 @@ typedef struct ax25_dev { #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER) ax25_dama_info dama; #endif + refcount_t refcount; } ax25_dev; typedef struct ax25_cb { @@ -293,6 +294,17 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25) } } +static inline void ax25_dev_hold(ax25_dev *ax25_dev) +{ + refcount_inc(&ax25_dev->refcount); +} + +static inline void ax25_dev_put(ax25_dev *ax25_dev) +{ + if (refcount_dec_and_test(&ax25_dev->refcount)) { + kfree(ax25_dev); + } +} static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; diff --git a/include/net/bonding.h b/include/net/bonding.h index f6ae3a4baea40dea1e52b9424956b0e18c22c873..83cfd2d70247dbb7162e23aad3bb8c757f8959a1 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -346,7 +346,7 @@ static inline bool bond_uses_primary(struct bonding *bond) static inline struct net_device *bond_option_active_slave_get_rcu(struct bonding *bond) { - struct slave *slave = rcu_dereference(bond->curr_active_slave); + struct slave *slave = rcu_dereference_rtnl(bond->curr_active_slave); return bond_uses_primary(bond) && slave ? slave->dev : NULL; } diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 14efa0ded75dd93d6f63735a95feaf72de6fdef7..adab27ba1ecbf054f5284e4342b1551b104d9a2c 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -123,8 +123,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); +#ifdef CONFIG_DST_CACHE + /* Unclone the dst cache if there is one */ + if (new_md->u.tun_info.dst_cache.cache) { + int ret; + + ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); + if (ret) { + metadata_dst_free(new_md); + return ERR_PTR(ret); + } + } +#endif + skb_dst_drop(skb); - dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); return new_md; } diff --git a/include/net/ip.h b/include/net/ip.h index 81e23a102a0d5edec859b78239b81e6dcd82c54d..b51bae43b0ddb00735a09718530aa3fff4a04872 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -525,19 +525,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); + /* We had many attacks based on IPID, use the private + * generator as much as we can. + */ + if (sk && inet_sk(sk)->inet_daddr) { + iph->id = htons(inet_sk(sk)->inet_id); + inet_sk(sk)->inet_id += segs; + return; + } if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { - /* This is only to work around buggy Windows95/2000 - * VJ compression implementations. If the ID field - * does not change, they drop every other packet in - * a TCP stream using header compression. - */ - if (sk && inet_sk(sk)->inet_daddr) { - iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += segs; - } else { - iph->id = 0; - } + iph->id = 0; } else { + /* Unfortunately we need the big hammer to get a suitable IPID */ __ip_select_ident(net, iph, segs); } } diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a9a4ccc0cdb5fd637094b0b5533a109ee3f60702..40ae8f1b18e502cece9f11b3a60c9cc30c2e1a5e 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -282,7 +282,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, fn = rcu_dereference(f6i->fib6_node); if (fn) { - *cookie = fn->fn_sernum; + *cookie = READ_ONCE(fn->fn_sernum); /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */ smp_rmb(); status = true; diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 937389e04c8e26b6f5a4c1362a90930145671889..87419f7f5421071e81915ed5423983decacadf02 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -350,7 +350,8 @@ static inline struct neighbour *neigh_create(struct neigh_table *tbl, return __neigh_create(tbl, pkey, dev, true); } void neigh_destroy(struct neighbour *neigh); -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb); +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, + const bool immediate_ok); int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u32 nlmsg_pid); void __neigh_set_probe_once(struct neighbour *neigh); @@ -460,17 +461,24 @@ static inline struct neighbour * neigh_clone(struct neighbour *neigh) #define neigh_hold(n) refcount_inc(&(n)->refcnt) -static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +static __always_inline int neigh_event_send_probe(struct neighbour *neigh, + struct sk_buff *skb, + const bool immediate_ok) { unsigned long now = jiffies; - + if (READ_ONCE(neigh->used) != now) WRITE_ONCE(neigh->used, now); - if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) - return __neigh_event_send(neigh, skb); + if (!(neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))) + return __neigh_event_send(neigh, skb, immediate_ok); return 0; } +static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +{ + return neigh_event_send_probe(neigh, skb, true); +} + #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb) { diff --git a/include/net/route.h b/include/net/route.h index 4c858dcf1aa8cd1988746e55eb698ad4425fd77b..25404fc2b48374c69081b8c72c2ea1dbbc09ed7f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -370,7 +370,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, { struct neighbour *neigh; - neigh = __ipv4_neigh_lookup_noref(dev, daddr); + neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &daddr, dev, false); diff --git a/include/net/tcp.h b/include/net/tcp.h index 44e442bf23f9ccc0a1a914345c3faf1fc9f99d5f..b9fc978fb2cad600abee7ba9667f91769a390063 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1369,6 +1369,7 @@ static inline bool tcp_checksum_complete(struct sk_buff *skb) bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); +#ifdef CONFIG_INET void __sk_defer_free_flush(struct sock *sk); static inline void sk_defer_free_flush(struct sock *sk) @@ -1377,6 +1378,9 @@ static inline void sk_defer_free_flush(struct sock *sk) return; __sk_defer_free_flush(sk); } +#else +static inline void sk_defer_free_flush(struct sock *sk) {} +#endif int tcp_filter(struct sock *sk, struct sk_buff *skb); void tcp_set_state(struct sock *sk, int state); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index ab7557d84f75136bad4f2cf4bb4ef24a612829b4..647c53b2610514eb5f6c8fa34e8df2261895df8b 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -415,9 +415,8 @@ extern int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int retries, struct scsi_mode_data *data, struct scsi_sense_hdr *); extern int scsi_mode_select(struct scsi_device *sdev, int pf, int sp, - int modepage, unsigned char *buffer, int len, - int timeout, int retries, - struct scsi_mode_data *data, + unsigned char *buffer, int len, int timeout, + int retries, struct scsi_mode_data *data, struct scsi_sense_hdr *); extern int scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, struct scsi_sense_hdr *sshdr); diff --git a/include/scsi/sg.h b/include/scsi/sg.h index 843cefb8efce0e60f7ad1a2e855a51a1ab3c0f2b..068e35d365575fc56df3924acd7f444c4b624580 100644 --- a/include/scsi/sg.h +++ b/include/scsi/sg.h @@ -29,10 +29,6 @@ * For utility and test programs see: http://sg.danny.cz/sg/sg3_utils.html */ -#ifdef __KERNEL__ -extern int sg_big_buff; /* for sysctl */ -#endif - typedef struct sg_iovec /* same structure as used by readv() Linux system */ { /* call. It defines one scatter-gather element. */ diff --git a/include/sound/pcm.h b/include/sound/pcm.h index 9b187d86e1bd65cc1befa0a81f48b65767c4433a..36da42cd0774854fd77ccdb53626c07e4a32152f 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -617,6 +617,7 @@ void snd_pcm_stream_unlock(struct snd_pcm_substream *substream); void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream); void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream); unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream); +unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream); /** * snd_pcm_stream_lock_irqsave - Lock the PCM stream @@ -635,6 +636,20 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream); void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, unsigned long flags); +/** + * snd_pcm_stream_lock_irqsave_nested - Single-nested PCM stream locking + * @substream: PCM substream + * @flags: irq flags + * + * This locks the PCM stream like snd_pcm_stream_lock_irqsave() but with + * the single-depth lockdep subclass. + */ +#define snd_pcm_stream_lock_irqsave_nested(substream, flags) \ + do { \ + typecheck(unsigned long, flags); \ + flags = _snd_pcm_stream_lock_irqsave_nested(substream); \ + } while (0) + /** * snd_pcm_group_for_each_entry - iterate over the linked substreams * @s: the iterator diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index 1172529b5b4945367c9487f8e2c7932bf404caea..c6f5aa74db89b4d1ebb1210b235f56198f096578 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -233,25 +233,48 @@ TRACE_EVENT(cachefiles_ref, TRACE_EVENT(cachefiles_lookup, TP_PROTO(struct cachefiles_object *obj, + struct dentry *dir, struct dentry *de), - TP_ARGS(obj, de), + TP_ARGS(obj, dir, de), TP_STRUCT__entry( __field(unsigned int, obj ) __field(short, error ) + __field(unsigned long, dino ) __field(unsigned long, ino ) ), TP_fast_assign( - __entry->obj = obj->debug_id; + __entry->obj = obj ? obj->debug_id : 0; + __entry->dino = d_backing_inode(dir)->i_ino; __entry->ino = (!IS_ERR(de) && d_backing_inode(de) ? d_backing_inode(de)->i_ino : 0); __entry->error = IS_ERR(de) ? PTR_ERR(de) : 0; ), - TP_printk("o=%08x i=%lx e=%d", - __entry->obj, __entry->ino, __entry->error) + TP_printk("o=%08x dB=%lx B=%lx e=%d", + __entry->obj, __entry->dino, __entry->ino, __entry->error) + ); + +TRACE_EVENT(cachefiles_mkdir, + TP_PROTO(struct dentry *dir, struct dentry *subdir), + + TP_ARGS(dir, subdir), + + TP_STRUCT__entry( + __field(unsigned int, dir ) + __field(unsigned int, subdir ) + ), + + TP_fast_assign( + __entry->dir = d_backing_inode(dir)->i_ino; + __entry->subdir = d_backing_inode(subdir)->i_ino; + ), + + TP_printk("dB=%x sB=%x", + __entry->dir, + __entry->subdir) ); TRACE_EVENT(cachefiles_tmpfile, @@ -269,7 +292,7 @@ TRACE_EVENT(cachefiles_tmpfile, __entry->backer = backer->i_ino; ), - TP_printk("o=%08x b=%08x", + TP_printk("o=%08x B=%x", __entry->obj, __entry->backer) ); @@ -289,61 +312,58 @@ TRACE_EVENT(cachefiles_link, __entry->backer = backer->i_ino; ), - TP_printk("o=%08x b=%08x", + TP_printk("o=%08x B=%x", __entry->obj, __entry->backer) ); TRACE_EVENT(cachefiles_unlink, TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, + ino_t ino, enum fscache_why_object_killed why), - TP_ARGS(obj, de, why), + TP_ARGS(obj, ino, why), /* Note that obj may be NULL */ TP_STRUCT__entry( __field(unsigned int, obj ) - __field(struct dentry *, de ) + __field(unsigned int, ino ) __field(enum fscache_why_object_killed, why ) ), TP_fast_assign( __entry->obj = obj ? obj->debug_id : UINT_MAX; - __entry->de = de; + __entry->ino = ino; __entry->why = why; ), - TP_printk("o=%08x d=%p w=%s", - __entry->obj, __entry->de, + TP_printk("o=%08x B=%x w=%s", + __entry->obj, __entry->ino, __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); TRACE_EVENT(cachefiles_rename, TP_PROTO(struct cachefiles_object *obj, - struct dentry *de, - struct dentry *to, + ino_t ino, enum fscache_why_object_killed why), - TP_ARGS(obj, de, to, why), + TP_ARGS(obj, ino, why), /* Note that obj may be NULL */ TP_STRUCT__entry( __field(unsigned int, obj ) - __field(struct dentry *, de ) - __field(struct dentry *, to ) + __field(unsigned int, ino ) __field(enum fscache_why_object_killed, why ) ), TP_fast_assign( __entry->obj = obj ? obj->debug_id : UINT_MAX; - __entry->de = de; - __entry->to = to; + __entry->ino = ino; __entry->why = why; ), - TP_printk("o=%08x d=%p t=%p w=%s", - __entry->obj, __entry->de, __entry->to, + TP_printk("o=%08x B=%x w=%s", + __entry->obj, __entry->ino, __print_symbolic(__entry->why, cachefiles_obj_kill_traces)) ); @@ -370,7 +390,7 @@ TRACE_EVENT(cachefiles_coherency, __entry->ino = ino; ), - TP_printk("o=%08x %s i=%llx c=%u", + TP_printk("o=%08x %s B=%llx c=%u", __entry->obj, __print_symbolic(__entry->why, cachefiles_coherency_traces), __entry->ino, @@ -397,7 +417,7 @@ TRACE_EVENT(cachefiles_vol_coherency, __entry->ino = ino; ), - TP_printk("V=%08x %s i=%llx", + TP_printk("V=%08x %s B=%llx", __entry->vol, __print_symbolic(__entry->why, cachefiles_coherency_traces), __entry->ino) @@ -435,7 +455,7 @@ TRACE_EVENT(cachefiles_prep_read, __entry->cache_inode = cache_inode; ), - TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x b=%x", + TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x B=%x", __entry->rreq, __entry->index, __print_symbolic(__entry->source, netfs_sreq_sources), __print_symbolic(__entry->why, cachefiles_prepare_read_traces), @@ -466,7 +486,7 @@ TRACE_EVENT(cachefiles_read, __entry->len = len; ), - TP_printk("o=%08x b=%08x s=%llx l=%zx", + TP_printk("o=%08x B=%x s=%llx l=%zx", __entry->obj, __entry->backer, __entry->start, @@ -495,7 +515,7 @@ TRACE_EVENT(cachefiles_write, __entry->len = len; ), - TP_printk("o=%08x b=%08x s=%llx l=%zx", + TP_printk("o=%08x B=%x s=%llx l=%zx", __entry->obj, __entry->backer, __entry->start, @@ -524,7 +544,7 @@ TRACE_EVENT(cachefiles_trunc, __entry->why = why; ), - TP_printk("o=%08x b=%08x %s l=%llx->%llx", + TP_printk("o=%08x B=%x %s l=%llx->%llx", __entry->obj, __entry->backer, __print_symbolic(__entry->why, cachefiles_trunc_traces), @@ -549,7 +569,28 @@ TRACE_EVENT(cachefiles_mark_active, __entry->inode = inode->i_ino; ), - TP_printk("o=%08x i=%lx", + TP_printk("o=%08x B=%lx", + __entry->obj, __entry->inode) + ); + +TRACE_EVENT(cachefiles_mark_failed, + TP_PROTO(struct cachefiles_object *obj, + struct inode *inode), + + TP_ARGS(obj, inode), + + /* Note that obj may be NULL */ + TP_STRUCT__entry( + __field(unsigned int, obj ) + __field(ino_t, inode ) + ), + + TP_fast_assign( + __entry->obj = obj ? obj->debug_id : 0; + __entry->inode = inode->i_ino; + ), + + TP_printk("o=%08x B=%lx", __entry->obj, __entry->inode) ); @@ -570,7 +611,7 @@ TRACE_EVENT(cachefiles_mark_inactive, __entry->inode = inode->i_ino; ), - TP_printk("o=%08x i=%lx", + TP_printk("o=%08x B=%lx", __entry->obj, __entry->inode) ); @@ -594,7 +635,7 @@ TRACE_EVENT(cachefiles_vfs_error, __entry->where = where; ), - TP_printk("o=%08x b=%08x %s e=%d", + TP_printk("o=%08x B=%x %s e=%d", __entry->obj, __entry->backer, __print_symbolic(__entry->where, cachefiles_error_traces), @@ -621,7 +662,7 @@ TRACE_EVENT(cachefiles_io_error, __entry->where = where; ), - TP_printk("o=%08x b=%08x %s e=%d", + TP_printk("o=%08x B=%x %s e=%d", __entry->obj, __entry->backer, __print_symbolic(__entry->where, cachefiles_error_traces), diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 3e042ca2cedb506b32c31d26287c447386f1e085..a8a64b97504d8a5d0a743a58c4cd5cb2625f0a83 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -14,7 +14,7 @@ EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ - EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \ + EM(SKB_DROP_REASON_SOCKET_FILTER, SOCKET_FILTER) \ EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ EMe(SKB_DROP_REASON_MAX, MAX) diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 1e566ac4b8123953380a4ea8788b179c01b1d6f7..29982d60b68ab271729d17190d175559a50eeb85 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -794,6 +794,9 @@ RPC_SHOW_SOCKET RPC_SHOW_SOCK + +#include + /* * Now redefine the EM() and EMe() macros to map the enums to the strings * that will be printed in the output. @@ -816,27 +819,32 @@ DECLARE_EVENT_CLASS(xs_socket_event, __field(unsigned int, socket_state) __field(unsigned int, sock_state) __field(unsigned long long, ino) - __string(dstaddr, - xprt->address_strings[RPC_DISPLAY_ADDR]) - __string(dstport, - xprt->address_strings[RPC_DISPLAY_PORT]) + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( struct inode *inode = SOCK_INODE(socket); + const struct sock *sk = socket->sk; + const struct inet_sock *inet = inet_sk(sk); + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + + TP_STORE_ADDR_PORTS(__entry, inet, sk); + __entry->socket_state = socket->state; __entry->sock_state = socket->sk->sk_state; __entry->ino = (unsigned long long)inode->i_ino; - __assign_str(dstaddr, - xprt->address_strings[RPC_DISPLAY_ADDR]); - __assign_str(dstport, - xprt->address_strings[RPC_DISPLAY_PORT]); + ), TP_printk( - "socket:[%llu] dstaddr=%s/%s " + "socket:[%llu] srcaddr=%pISpc dstaddr=%pISpc " "state=%u (%s) sk_state=%u (%s)", - __entry->ino, __get_str(dstaddr), __get_str(dstport), + __entry->ino, + __entry->saddr, + __entry->daddr, __entry->socket_state, rpc_show_socket_state(__entry->socket_state), __entry->sock_state, @@ -866,29 +874,33 @@ DECLARE_EVENT_CLASS(xs_socket_event_done, __field(unsigned int, socket_state) __field(unsigned int, sock_state) __field(unsigned long long, ino) - __string(dstaddr, - xprt->address_strings[RPC_DISPLAY_ADDR]) - __string(dstport, - xprt->address_strings[RPC_DISPLAY_PORT]) + __array(__u8, saddr, sizeof(struct sockaddr_in6)) + __array(__u8, daddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( struct inode *inode = SOCK_INODE(socket); + const struct sock *sk = socket->sk; + const struct inet_sock *inet = inet_sk(sk); + + memset(__entry->saddr, 0, sizeof(struct sockaddr_in6)); + memset(__entry->daddr, 0, sizeof(struct sockaddr_in6)); + + TP_STORE_ADDR_PORTS(__entry, inet, sk); + __entry->socket_state = socket->state; __entry->sock_state = socket->sk->sk_state; __entry->ino = (unsigned long long)inode->i_ino; __entry->error = error; - __assign_str(dstaddr, - xprt->address_strings[RPC_DISPLAY_ADDR]); - __assign_str(dstport, - xprt->address_strings[RPC_DISPLAY_PORT]); ), TP_printk( - "error=%d socket:[%llu] dstaddr=%s/%s " + "error=%d socket:[%llu] srcaddr=%pISpc dstaddr=%pISpc " "state=%u (%s) sk_state=%u (%s)", __entry->error, - __entry->ino, __get_str(dstaddr), __get_str(dstport), + __entry->ino, + __entry->saddr, + __entry->daddr, __entry->socket_state, rpc_show_socket_state(__entry->socket_state), __entry->sock_state, @@ -953,7 +965,8 @@ TRACE_EVENT(rpc_socket_nospace, { BIT(XPRT_REMOVE), "REMOVE" }, \ { BIT(XPRT_CONGESTED), "CONGESTED" }, \ { BIT(XPRT_CWND_WAIT), "CWND_WAIT" }, \ - { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }) + { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }, \ + { BIT(XPRT_SND_IS_COOKIE), "SND_IS_COOKIE" }) DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class, TP_PROTO( @@ -1150,8 +1163,11 @@ DECLARE_EVENT_CLASS(xprt_writelock_event, __entry->task_id = -1; __entry->client_id = -1; } - __entry->snd_task_id = xprt->snd_task ? - xprt->snd_task->tk_pid : -1; + if (xprt->snd_task && + !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) + __entry->snd_task_id = xprt->snd_task->tk_pid; + else + __entry->snd_task_id = -1; ), TP_printk(SUNRPC_TRACE_TASK_SPECIFIER @@ -1196,8 +1212,12 @@ DECLARE_EVENT_CLASS(xprt_cong_event, __entry->task_id = -1; __entry->client_id = -1; } - __entry->snd_task_id = xprt->snd_task ? - xprt->snd_task->tk_pid : -1; + if (xprt->snd_task && + !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) + __entry->snd_task_id = xprt->snd_task->tk_pid; + else + __entry->snd_task_id = -1; + __entry->cong = xprt->cong; __entry->cwnd = xprt->cwnd; __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); diff --git a/include/trace/perf.h b/include/trace/perf.h index ea4405de175a3eb6892a29896bee45800c5936a2..5d48c46a300835c584e8e374f41a3b79f01b00f2 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -23,8 +23,9 @@ #undef __get_rel_dynamic_array #define __get_rel_dynamic_array(field) \ - ((void *)(&__entry->__rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ + ((void *)__entry + \ + offsetof(typeof(*__entry), __rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ (__entry->__rel_loc_##field & 0xffff)) #undef __get_rel_dynamic_array_len diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h index 8c6f7c43351857905a3dc006a61a0e4b88c352a3..3d29919045af2e1b7eed306035dad4dcbf7b5286 100644 --- a/include/trace/trace_events.h +++ b/include/trace/trace_events.h @@ -128,7 +128,7 @@ TRACE_MAKE_SYSTEM_STR(); struct trace_event_raw_##name { \ struct trace_entry ent; \ tstruct \ - char __data[0]; \ + char __data[]; \ }; \ \ static struct trace_event_class event_class_##name; @@ -318,9 +318,10 @@ TRACE_MAKE_SYSTEM_STR(); #define __get_str(field) ((char *)__get_dynamic_array(field)) #undef __get_rel_dynamic_array -#define __get_rel_dynamic_array(field) \ - ((void *)(&__entry->__rel_loc_##field) + \ - sizeof(__entry->__rel_loc_##field) + \ +#define __get_rel_dynamic_array(field) \ + ((void *)__entry + \ + offsetof(typeof(*__entry), __rel_loc_##field) + \ + sizeof(__entry->__rel_loc_##field) + \ (__entry->__rel_loc_##field & 0xffff)) #undef __get_rel_dynamic_array_len diff --git a/include/uapi/linux/cyclades.h b/include/uapi/linux/cyclades.h new file mode 100644 index 0000000000000000000000000000000000000000..6225c5aebe06a8ed9c3c779e24d9bc141563722a --- /dev/null +++ b/include/uapi/linux/cyclades.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_CYCLADES_H +#define _UAPI_LINUX_CYCLADES_H + +#warning "Support for features provided by this header has been removed" +#warning "Please consider updating your code" + +struct cyclades_monitor { + unsigned long int_count; + unsigned long char_count; + unsigned long char_max; + unsigned long char_last; +}; + +#define CYGETMON 0x435901 +#define CYGETTHRESH 0x435902 +#define CYSETTHRESH 0x435903 +#define CYGETDEFTHRESH 0x435904 +#define CYSETDEFTHRESH 0x435905 +#define CYGETTIMEOUT 0x435906 +#define CYSETTIMEOUT 0x435907 +#define CYGETDEFTIMEOUT 0x435908 +#define CYSETDEFTIMEOUT 0x435909 +#define CYSETRFLOW 0x43590a +#define CYGETRFLOW 0x43590b +#define CYSETRTSDTR_INV 0x43590c +#define CYGETRTSDTR_INV 0x43590d +#define CYZSETPOLLCYCLE 0x43590e +#define CYZGETPOLLCYCLE 0x43590f +#define CYGETCD1400VER 0x435910 +#define CYSETWAIT 0x435912 +#define CYGETWAIT 0x435913 + +#endif /* _UAPI_LINUX_CYCLADES_H */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 9563d294f181974a1032ae5588f754af1b9c6f53..5191b57e156220bd7fc6d85940c301e16cc1645a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 #define KVM_CAP_VM_GPA_BITS 207 #define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 #ifdef KVM_CAP_IRQ_ROUTING @@ -1623,9 +1624,6 @@ struct kvm_enc_region { #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) -/* Available with KVM_CAP_XSAVE2 */ -#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) - struct kvm_s390_pv_sec_parm { __u64 origin; __u64 length; @@ -2047,4 +2045,7 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + #endif /* __LINUX_KVM_H */ diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 4b3395082d15c95acc2afaeb7f15b32baf65ef53..26071021e986f6e3f6f71cf12fba8bd11a2390bd 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -106,7 +106,7 @@ enum ip_conntrack_status { IPS_NAT_CLASH = IPS_UNTRACKED, #endif - /* Conntrack got a helper explicitly attached via CT target. */ + /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 1b65042ab1db8df48b3daa7940756e1d0e132abe..82858b697c05a1d27b6db110a9da3a24bce6ca0b 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -465,6 +465,8 @@ struct perf_event_attr { /* * User provided data if sigtrap=1, passed back to user via * siginfo_t::si_perf_data, e.g. to permit user to identify the event. + * Note, siginfo_t::si_perf_data is long-sized, and sig_data will be + * truncated accordingly on 32 bit architectures. */ __u64 sig_data; }; diff --git a/include/uapi/linux/smc_diag.h b/include/uapi/linux/smc_diag.h index c7008d87f1a4c77d80f4d40ac3c447e760f7fbdf..8cb3a6fef55301e8432ea99421b60b7d96ecad45 100644 --- a/include/uapi/linux/smc_diag.h +++ b/include/uapi/linux/smc_diag.h @@ -84,12 +84,11 @@ struct smc_diag_conninfo { /* SMC_DIAG_LINKINFO */ struct smc_diag_linkinfo { - __u8 link_id; /* link identifier */ - __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ - __u8 ibport; /* RDMA device port number */ - __u8 gid[40]; /* local GID */ - __u8 peer_gid[40]; /* peer GID */ - __aligned_u64 net_cookie; /* RDMA device net namespace */ + __u8 link_id; /* link identifier */ + __u8 ibname[IB_DEVICE_NAME_MAX]; /* name of the RDMA device */ + __u8 ibport; /* RDMA device port number */ + __u8 gid[40]; /* local GID */ + __u8 peer_gid[40]; /* peer GID */ }; struct smc_diag_lgrinfo { diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h index ef0cafe295b282539a552c87df0f63207bd2dd86..2d3e5df39a59eeec5edaed0754bf2dfe798febfe 100644 --- a/include/uapi/sound/asound.h +++ b/include/uapi/sound/asound.h @@ -56,8 +56,10 @@ * * ****************************************************************************/ +#define AES_IEC958_STATUS_SIZE 24 + struct snd_aes_iec958 { - unsigned char status[24]; /* AES/IEC958 channel status bits */ + unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */ unsigned char subcode[147]; /* AES/IEC958 subcode bits */ unsigned char pad; /* nothing */ unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ diff --git a/include/uapi/xen/gntdev.h b/include/uapi/xen/gntdev.h index 9ac5515b9bc220ad3090e70e2f074b73354eac44..7a7145395c09c8767ef042853b877f4b9a478e04 100644 --- a/include/uapi/xen/gntdev.h +++ b/include/uapi/xen/gntdev.h @@ -47,7 +47,13 @@ struct ioctl_gntdev_grant_ref { /* * Inserts the grant references into the mapping table of an instance * of gntdev. N.B. This does not perform the mapping, which is deferred - * until mmap() is called with @index as the offset. + * until mmap() is called with @index as the offset. @index should be + * considered opaque to userspace, with one exception: if no grant + * references have ever been inserted into the mapping table of this + * instance, @index will be set to 0. This is necessary to use gntdev + * with userspace APIs that expect a file descriptor that can be + * mmap()'d at offset 0, such as Wayland. If @count is set to 0, this + * ioctl will fail. */ #define IOCTL_GNTDEV_MAP_GRANT_REF \ _IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref)) diff --git a/include/xen/xenbus_dev.h b/include/xen/xenbus_dev.h index bbee8c6a349dec65d57c0f683fcb4d6c621111b5..4dc45a51c044f85a21e7d21dd74229eb01463a7c 100644 --- a/include/xen/xenbus_dev.h +++ b/include/xen/xenbus_dev.h @@ -1,6 +1,4 @@ /****************************************************************************** - * evtchn.h - * * Interface to /dev/xen/xenbus_backend. * * Copyright (c) 2011 Bastian Blank diff --git a/init/main.c b/init/main.c index bb984ed79de0ea49b9a298c209c0c2f7e6f70941..65fa2e41a9c0904131525d504f3ec86add44f141 100644 --- a/init/main.c +++ b/init/main.c @@ -834,12 +834,15 @@ static void __init mm_init(void) init_mem_debugging_and_hardening(); kfence_alloc_pool(); report_meminit(); - stack_depot_init(); + stack_depot_early_init(); mem_init(); mem_init_print_info(); - /* page_owner must be initialized after buddy is ready */ - page_ext_init_flatmem_late(); kmem_cache_init(); + /* + * page_owner must be initialized after buddy is ready, and also after + * slab is ready so that stack_depot_init() works properly + */ + page_ext_init_flatmem_late(); kmemleak_init(); pgtable_init(); debug_objects_mem_init(); diff --git a/ipc/sem.c b/ipc/sem.c index 6693daf4fe1124b3a69a345c42f30cf9767a739b..0dbdb98fdf2d940c33400fb82e353c1d5896cfa9 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1964,6 +1964,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) */ un = lookup_undo(ulp, semid); if (un) { + spin_unlock(&ulp->lock); kvfree(new); goto success; } @@ -1976,9 +1977,8 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) ipc_assert_locked_object(&sma->sem_perm); list_add(&new->list_id, &sma->list_id); un = new; - -success: spin_unlock(&ulp->lock); +success: sem_unlock(sma, -1); out: return un; diff --git a/ipc/util.c b/ipc/util.c index fa2d86ef3fb80b1a828043190a69f8c935715354..a2208d0f26b2df52605ad610758b7b8b6f64a46f 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -894,7 +894,7 @@ static int sysvipc_proc_open(struct inode *inode, struct file *file) if (!iter) return -ENOMEM; - iter->iface = PDE_DATA(inode); + iter->iface = pde_data(inode); iter->ns = get_ipc_ns(current->nsproxy->ipc_ns); iter->pid_ns = get_pid_ns(task_active_pid_ns(current)); diff --git a/kernel/async.c b/kernel/async.c index b8d7a663497f9e9ea65507469a25fd5f252330bc..b2c4ba5686ee493ac3d6e8391eda8bf3f6e8de81 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); - /* mark that this task has queued an async job, used by module init */ - current->flags |= PF_USED_ASYNC; - /* schedule for execution */ queue_work_node(node, system_unbound_wq, &entry->work); diff --git a/kernel/audit.c b/kernel/audit.c index e4bbe2c70c26bf77ceb612212469ed7d90aa88bc..7690c29d4ee44e009d7a547aab23836c3ca918b2 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk_buff *skb) /** * kauditd_rehold_skb - Handle a audit record send failure in the hold queue * @skb: audit record + * @error: error code (unused) * * Description: * This should only be used by the kauditd_thread when it fails to flush the * hold queue. */ -static void kauditd_rehold_skb(struct sk_buff *skb) +static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error) { - /* put the record back in the queue at the same place */ - skb_queue_head(&audit_hold_queue, skb); + /* put the record back in the queue */ + skb_queue_tail(&audit_hold_queue, skb); } /** * kauditd_hold_skb - Queue an audit record, waiting for auditd * @skb: audit record + * @error: error code * * Description: * Queue the audit record, waiting for an instance of auditd. When this @@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb) * and queue it, if we have room. If we want to hold on to the record, but we * don't have room, record a record lost message. */ -static void kauditd_hold_skb(struct sk_buff *skb) +static void kauditd_hold_skb(struct sk_buff *skb, int error) { /* at this point it is uncertain if we will ever send this to auditd so * try to send the message via printk before we go any further */ kauditd_printk_skb(skb); /* can we just silently drop the message? */ - if (!audit_default) { - kfree_skb(skb); - return; + if (!audit_default) + goto drop; + + /* the hold queue is only for when the daemon goes away completely, + * not -EAGAIN failures; if we are in a -EAGAIN state requeue the + * record on the retry queue unless it's full, in which case drop it + */ + if (error == -EAGAIN) { + if (!audit_backlog_limit || + skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_retry_queue, skb); + return; + } + audit_log_lost("kauditd retry queue overflow"); + goto drop; } - /* if we have room, queue the message */ + /* if we have room in the hold queue, queue the message */ if (!audit_backlog_limit || skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { skb_queue_tail(&audit_hold_queue, skb); @@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_buff *skb) /* we have no other options - drop the message */ audit_log_lost("kauditd hold queue overflow"); +drop: kfree_skb(skb); } /** * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd * @skb: audit record + * @error: error code (unused) * * Description: * Not as serious as kauditd_hold_skb() as we still have a connected auditd, * but for some reason we are having problems sending it audit records so * queue the given record and attempt to resend. */ -static void kauditd_retry_skb(struct sk_buff *skb) +static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error) { - /* NOTE: because records should only live in the retry queue for a - * short period of time, before either being sent or moved to the hold - * queue, we don't currently enforce a limit on this queue */ - skb_queue_tail(&audit_retry_queue, skb); + if (!audit_backlog_limit || + skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_retry_queue, skb); + return; + } + + /* we have to drop the record, send it via printk as a last effort */ + kauditd_printk_skb(skb); + audit_log_lost("kauditd retry queue overflow"); + kfree_skb(skb); } /** @@ -640,7 +662,7 @@ static void auditd_reset(const struct auditd_connection *ac) /* flush the retry queue to the hold queue, but don't touch the main * queue since we need to process that normally for multicast */ while ((skb = skb_dequeue(&audit_retry_queue))) - kauditd_hold_skb(skb); + kauditd_hold_skb(skb, -ECONNREFUSED); } /** @@ -714,16 +736,18 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, struct sk_buff_head *queue, unsigned int retry_limit, void (*skb_hook)(struct sk_buff *skb), - void (*err_hook)(struct sk_buff *skb)) + void (*err_hook)(struct sk_buff *skb, int error)) { int rc = 0; - struct sk_buff *skb; + struct sk_buff *skb = NULL; + struct sk_buff *skb_tail; unsigned int failed = 0; /* NOTE: kauditd_thread takes care of all our locking, we just use * the netlink info passed to us (e.g. sk and portid) */ - while ((skb = skb_dequeue(queue))) { + skb_tail = skb_peek_tail(queue); + while ((skb != skb_tail) && (skb = skb_dequeue(queue))) { /* call the skb_hook for each skb we touch */ if (skb_hook) (*skb_hook)(skb); @@ -731,7 +755,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, /* can we send to anyone via unicast? */ if (!sk) { if (err_hook) - (*err_hook)(skb); + (*err_hook)(skb, -ECONNREFUSED); continue; } @@ -745,7 +769,7 @@ retry: rc == -ECONNREFUSED || rc == -EPERM) { sk = NULL; if (err_hook) - (*err_hook)(skb); + (*err_hook)(skb, rc); if (rc == -EAGAIN) rc = 0; /* continue to drain the queue */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index fce5d43a933f0000df654edd3a6d95481dd319e8..a83928cbdcb7cd2e52d3bf40db5e794db39499a7 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -185,7 +185,7 @@ static int audit_match_perm(struct audit_context *ctx, int mask) case AUDITSC_EXECVE: return mask & AUDIT_PERM_EXEC; case AUDITSC_OPENAT2: - return mask & ACC_MODE((u32)((struct open_how *)ctx->argv[2])->flags); + return mask & ACC_MODE((u32)ctx->openat2.flags); default: return 0; } diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 06062370c3b81e4c7e9653a2bae832f36622b1f2..9e4ecc99064775bfd3ddf9b98963f2f54ff94485 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -207,7 +207,7 @@ BTF_ID(func, bpf_lsm_socket_socketpair) BTF_ID(func, bpf_lsm_syslog) BTF_ID(func, bpf_lsm_task_alloc) -BTF_ID(func, bpf_lsm_task_getsecid_subj) +BTF_ID(func, bpf_lsm_current_getsecid_subj) BTF_ID(func, bpf_lsm_task_getsecid_obj) BTF_ID(func, bpf_lsm_task_prctl) BTF_ID(func, bpf_lsm_task_setscheduler) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index 638d7fd7b375456b4ce14b2caf743aee2bc28acd..710ba9de12ce41426f34d29656d746fd4df8de34 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -104,7 +104,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) } rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages, - VM_ALLOC | VM_USERMAP, PAGE_KERNEL); + VM_MAP | VM_USERMAP, PAGE_KERNEL); if (rb) { kmemleak_not_leak(pages); rb->pages = pages; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 49e567209c6b6f961e6d10a27db90440213b090f..22c8ae94e4c1cba7549c2e08d5da4b8f0ba564dc 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -472,13 +472,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, u32, size, u64, flags) { struct pt_regs *regs; - long res; + long res = -EINVAL; if (!try_get_task_stack(task)) return -EFAULT; regs = task_pt_regs(task); - res = __bpf_get_stack(regs, task, NULL, buf, size, flags); + if (regs) + res = __bpf_get_stack(regs, task, NULL, buf, size, flags); put_task_stack(task); return res; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 4b6974a195c138a3a69611408473c1c6d0d8f99f..5e7edf913060129bad0271924bb40f7777a137d2 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -550,11 +550,12 @@ static __always_inline u64 notrace bpf_prog_start_time(void) static void notrace inc_misses_counter(struct bpf_prog *prog) { struct bpf_prog_stats *stats; + unsigned int flags; stats = this_cpu_ptr(prog->stats); - u64_stats_update_begin(&stats->syncp); + flags = u64_stats_update_begin_irqsave(&stats->syncp); u64_stats_inc(&stats->misses); - u64_stats_update_end(&stats->syncp); + u64_stats_update_end_irqrestore(&stats->syncp, flags); } /* The logic is similar to bpf_prog_run(), but with an explicit diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 41e0837a5a0bda17b361b87f4a4faf4790eb0b8a..0e877dbcfeea9c337e9af64c9264bc888f5b9968 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -549,6 +549,14 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if ((of->file->f_cred->user_ns != &init_user_ns) || + !capable(CAP_SYS_ADMIN)) + return -EPERM; + cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; @@ -954,6 +962,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) /* Specifying two release agents is forbidden */ if (ctx->release_agent) return invalfc(fc, "release_agent respecified"); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) + return invalfc(fc, "Setting release_agent not allowed"); ctx->release_agent = param->string; param->string = NULL; break; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index b31e1465868ab542c91a692063eb845ed2de1e05..9d05c3ca2d5e3cb9cfaebe123a885e4ddd6fd522 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3643,6 +3643,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, cgroup_get(cgrp); cgroup_kn_unlock(of->kn); + /* Allow only one trigger per file descriptor */ + if (ctx->psi.trigger) { + cgroup_put(cgrp); + return -EBUSY; + } + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; new = psi_trigger_create(psi, buf, nbytes, res); if (IS_ERR(new)) { @@ -3650,8 +3656,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, return PTR_ERR(new); } - psi_trigger_replace(&ctx->psi.trigger, new); - + smp_store_release(&ctx->psi.trigger, new); cgroup_put(cgrp); return nbytes; @@ -3690,7 +3695,7 @@ static void cgroup_pressure_release(struct kernfs_open_file *of) { struct cgroup_file_ctx *ctx = of->priv; - psi_trigger_replace(&ctx->psi.trigger, NULL); + psi_trigger_destroy(ctx->psi.trigger); } bool cgroup_psi_enabled(void) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index dc653ab26e50e0a42632490172c225a8b9c3fcd5..4c7254e8f49a546fb1e9d7ccb178005c7b5d3f79 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -590,6 +590,35 @@ static inline void free_cpuset(struct cpuset *cs) kfree(cs); } +/* + * validate_change_legacy() - Validate conditions specific to legacy (v1) + * behavior. + */ +static int validate_change_legacy(struct cpuset *cur, struct cpuset *trial) +{ + struct cgroup_subsys_state *css; + struct cpuset *c, *par; + int ret; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + /* Each of our child cpusets must be a subset of us */ + ret = -EBUSY; + cpuset_for_each_child(c, css, cur) + if (!is_cpuset_subset(c, trial)) + goto out; + + /* On legacy hierarchy, we must be a subset of our parent cpuset. */ + ret = -EACCES; + par = parent_cs(cur); + if (par && !is_cpuset_subset(trial, par)) + goto out; + + ret = 0; +out: + return ret; +} + /* * validate_change() - Used to validate that any proposed cpuset change * follows the structural rules for cpusets. @@ -614,20 +643,21 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial) { struct cgroup_subsys_state *css; struct cpuset *c, *par; - int ret; - - /* The checks don't apply to root cpuset */ - if (cur == &top_cpuset) - return 0; + int ret = 0; rcu_read_lock(); - par = parent_cs(cur); - /* On legacy hierarchy, we must be a subset of our parent cpuset. */ - ret = -EACCES; - if (!is_in_v2_mode() && !is_cpuset_subset(trial, par)) + if (!is_in_v2_mode()) + ret = validate_change_legacy(cur, trial); + if (ret) + goto out; + + /* Remaining checks don't apply to root cpuset */ + if (cur == &top_cpuset) goto out; + par = parent_cs(cur); + /* * If either I or some sibling (!= me) is exclusive, we can't * overlap @@ -1175,9 +1205,7 @@ enum subparts_cmd { * * Because of the implicit cpu exclusive nature of a partition root, * cpumask changes that violates the cpu exclusivity rule will not be - * permitted when checked by validate_change(). The validate_change() - * function will also prevent any changes to the cpu list if it is not - * a superset of children's cpu lists. + * permitted when checked by validate_change(). */ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, struct cpumask *newmask, @@ -1522,10 +1550,15 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, struct cpuset *sibling; struct cgroup_subsys_state *pos_css; + percpu_rwsem_assert_held(&cpuset_rwsem); + /* * Check all its siblings and call update_cpumasks_hier() * if their use_parent_ecpus flag is set in order for them * to use the right effective_cpus value. + * + * The update_cpumasks_hier() function may sleep. So we have to + * release the RCU read lock before calling it. */ rcu_read_lock(); cpuset_for_each_child(sibling, pos_css, parent) { @@ -1533,8 +1566,13 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, continue; if (!sibling->use_parent_ecpus) continue; + if (!css_tryget_online(&sibling->css)) + continue; + rcu_read_unlock(); update_cpumasks_hier(sibling, tmp); + rcu_read_lock(); + css_put(&sibling->css); } rcu_read_unlock(); } @@ -1607,8 +1645,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * Make sure that subparts_cpus is a subset of cpus_allowed. */ if (cs->nr_subparts_cpus) { - cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus, - cs->cpus_allowed); + cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed); cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); } spin_unlock_irq(&callback_lock); diff --git a/kernel/events/core.c b/kernel/events/core.c index fc18664f49b08743883b4ac0758ccdc58c7e7e08..6859229497b1544594659d40ed301d41425da035 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -674,6 +674,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state) WRITE_ONCE(event->state, state); } +/* + * UP store-release, load-acquire + */ + +#define __store_release(ptr, val) \ +do { \ + barrier(); \ + WRITE_ONCE(*(ptr), (val)); \ +} while (0) + +#define __load_acquire(ptr) \ +({ \ + __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \ + barrier(); \ + ___p; \ +}) + #ifdef CONFIG_CGROUP_PERF static inline bool @@ -719,34 +736,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event) return t->time; } -static inline void __update_cgrp_time(struct perf_cgroup *cgrp) +static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) { - struct perf_cgroup_info *info; - u64 now; - - now = perf_clock(); + struct perf_cgroup_info *t; - info = this_cpu_ptr(cgrp->info); + t = per_cpu_ptr(event->cgrp->info, event->cpu); + if (!__load_acquire(&t->active)) + return t->time; + now += READ_ONCE(t->timeoffset); + return now; +} - info->time += now - info->timestamp; +static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv) +{ + if (adv) + info->time += now - info->timestamp; info->timestamp = now; + /* + * see update_context_time() + */ + WRITE_ONCE(info->timeoffset, info->time - info->timestamp); } -static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) +static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final) { struct perf_cgroup *cgrp = cpuctx->cgrp; struct cgroup_subsys_state *css; + struct perf_cgroup_info *info; if (cgrp) { + u64 now = perf_clock(); + for (css = &cgrp->css; css; css = css->parent) { cgrp = container_of(css, struct perf_cgroup, css); - __update_cgrp_time(cgrp); + info = this_cpu_ptr(cgrp->info); + + __update_cgrp_time(info, now, true); + if (final) + __store_release(&info->active, 0); } } } static inline void update_cgrp_time_from_event(struct perf_event *event) { + struct perf_cgroup_info *info; struct perf_cgroup *cgrp; /* @@ -760,8 +794,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) /* * Do not update time when cgroup is not active */ - if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) - __update_cgrp_time(event->cgrp); + if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) { + info = this_cpu_ptr(event->cgrp->info); + __update_cgrp_time(info, perf_clock(), true); + } } static inline void @@ -785,7 +821,8 @@ perf_cgroup_set_timestamp(struct task_struct *task, for (css = &cgrp->css; css; css = css->parent) { cgrp = container_of(css, struct perf_cgroup, css); info = this_cpu_ptr(cgrp->info); - info->timestamp = ctx->timestamp; + __update_cgrp_time(info, ctx->timestamp, false); + __store_release(&info->active, 1); } } @@ -802,7 +839,7 @@ static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list); */ static void perf_cgroup_switch(struct task_struct *task, int mode) { - struct perf_cpu_context *cpuctx; + struct perf_cpu_context *cpuctx, *tmp; struct list_head *list; unsigned long flags; @@ -813,7 +850,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) local_irq_save(flags); list = this_cpu_ptr(&cgrp_cpuctx_list); - list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) { + list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) { WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); perf_ctx_lock(cpuctx, cpuctx->task_ctx); @@ -981,14 +1018,6 @@ out: return ret; } -static inline void -perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) -{ - struct perf_cgroup_info *t; - t = per_cpu_ptr(event->cgrp->info, event->cpu); - event->shadow_ctx_time = now - t->timestamp; -} - static inline void perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx) { @@ -1066,7 +1095,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) { } -static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) +static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, + bool final) { } @@ -1098,12 +1128,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next) { } -static inline void -perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) +static inline u64 perf_cgroup_event_time(struct perf_event *event) { + return 0; } -static inline u64 perf_cgroup_event_time(struct perf_event *event) +static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) { return 0; } @@ -1525,22 +1555,59 @@ static void perf_unpin_context(struct perf_event_context *ctx) /* * Update the record of the current time in a context. */ -static void update_context_time(struct perf_event_context *ctx) +static void __update_context_time(struct perf_event_context *ctx, bool adv) { u64 now = perf_clock(); - ctx->time += now - ctx->timestamp; + if (adv) + ctx->time += now - ctx->timestamp; ctx->timestamp = now; + + /* + * The above: time' = time + (now - timestamp), can be re-arranged + * into: time` = now + (time - timestamp), which gives a single value + * offset to compute future time without locks on. + * + * See perf_event_time_now(), which can be used from NMI context where + * it's (obviously) not possible to acquire ctx->lock in order to read + * both the above values in a consistent manner. + */ + WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp); +} + +static void update_context_time(struct perf_event_context *ctx) +{ + __update_context_time(ctx, true); } static u64 perf_event_time(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; + if (unlikely(!ctx)) + return 0; + if (is_cgroup_event(event)) return perf_cgroup_event_time(event); - return ctx ? ctx->time : 0; + return ctx->time; +} + +static u64 perf_event_time_now(struct perf_event *event, u64 now) +{ + struct perf_event_context *ctx = event->ctx; + + if (unlikely(!ctx)) + return 0; + + if (is_cgroup_event(event)) + return perf_cgroup_event_time_now(event, now); + + if (!(__load_acquire(&ctx->is_active) & EVENT_TIME)) + return ctx->time; + + now += READ_ONCE(ctx->timeoffset); + return now; } static enum event_type_t get_event_type(struct perf_event *event) @@ -2350,7 +2417,7 @@ __perf_remove_from_context(struct perf_event *event, if (ctx->is_active & EVENT_TIME) { update_context_time(ctx); - update_cgrp_time_from_cpuctx(cpuctx); + update_cgrp_time_from_cpuctx(cpuctx, false); } event_sched_out(event, cpuctx, ctx); @@ -2361,6 +2428,9 @@ __perf_remove_from_context(struct perf_event *event, list_del_event(event, ctx); if (!ctx->nr_events && ctx->is_active) { + if (ctx == &cpuctx->ctx) + update_cgrp_time_from_cpuctx(cpuctx, true); + ctx->is_active = 0; ctx->rotate_necessary = 0; if (ctx->task) { @@ -2392,7 +2462,11 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla * event_function_call() user. */ raw_spin_lock_irq(&ctx->lock); - if (!ctx->is_active) { + /* + * Cgroup events are per-cpu events, and must IPI because of + * cgrp_cpuctx_list. + */ + if (!ctx->is_active && !is_cgroup_event(event)) { __perf_remove_from_context(event, __get_cpu_context(ctx), ctx, (void *)flags); raw_spin_unlock_irq(&ctx->lock); @@ -2482,40 +2556,6 @@ void perf_event_disable_inatomic(struct perf_event *event) irq_work_queue(&event->pending); } -static void perf_set_shadow_time(struct perf_event *event, - struct perf_event_context *ctx) -{ - /* - * use the correct time source for the time snapshot - * - * We could get by without this by leveraging the - * fact that to get to this function, the caller - * has most likely already called update_context_time() - * and update_cgrp_time_xx() and thus both timestamp - * are identical (or very close). Given that tstamp is, - * already adjusted for cgroup, we could say that: - * tstamp - ctx->timestamp - * is equivalent to - * tstamp - cgrp->timestamp. - * - * Then, in perf_output_read(), the calculation would - * work with no changes because: - * - event is guaranteed scheduled in - * - no scheduled out in between - * - thus the timestamp would be the same - * - * But this is a bit hairy. - * - * So instead, we have an explicit cgroup call to remain - * within the time source all along. We believe it - * is cleaner and simpler to understand. - */ - if (is_cgroup_event(event)) - perf_cgroup_set_shadow_time(event, event->tstamp); - else - event->shadow_ctx_time = event->tstamp - ctx->timestamp; -} - #define MAX_INTERRUPTS (~0ULL) static void perf_log_throttle(struct perf_event *event, int enable); @@ -2556,8 +2596,6 @@ event_sched_in(struct perf_event *event, perf_pmu_disable(event->pmu); - perf_set_shadow_time(event, ctx); - perf_log_itrace_start(event); if (event->pmu->add(event, PERF_EF_START)) { @@ -2861,11 +2899,14 @@ perf_install_in_context(struct perf_event_context *ctx, * perf_event_attr::disabled events will not run and can be initialized * without IPI. Except when this is the first event for the context, in * that case we need the magic of the IPI to set ctx->is_active. + * Similarly, cgroup events for the context also needs the IPI to + * manipulate the cgrp_cpuctx_list. * * The IOC_ENABLE that is sure to follow the creation of a disabled * event will issue the IPI and reprogram the hardware. */ - if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && ctx->nr_events) { + if (__perf_effective_state(event) == PERF_EVENT_STATE_OFF && + ctx->nr_events && !is_cgroup_event(event)) { raw_spin_lock_irq(&ctx->lock); if (ctx->task == TASK_TOMBSTONE) { raw_spin_unlock_irq(&ctx->lock); @@ -3197,6 +3238,15 @@ static int perf_event_modify_breakpoint(struct perf_event *bp, return err; } +/* + * Copy event-type-independent attributes that may be modified. + */ +static void perf_event_modify_copy_attr(struct perf_event_attr *to, + const struct perf_event_attr *from) +{ + to->sig_data = from->sig_data; +} + static int perf_event_modify_attr(struct perf_event *event, struct perf_event_attr *attr) { @@ -3219,10 +3269,17 @@ static int perf_event_modify_attr(struct perf_event *event, WARN_ON_ONCE(event->ctx->parent_ctx); mutex_lock(&event->child_mutex); + /* + * Event-type-independent attributes must be copied before event-type + * modification, which will validate that final attributes match the + * source attributes after all relevant attributes have been copied. + */ + perf_event_modify_copy_attr(&event->attr, attr); err = func(event, attr); if (err) goto out; list_for_each_entry(child, &event->child_list, child_list) { + perf_event_modify_copy_attr(&child->attr, attr); err = func(child, attr); if (err) goto out; @@ -3251,16 +3308,6 @@ static void ctx_sched_out(struct perf_event_context *ctx, return; } - ctx->is_active &= ~event_type; - if (!(ctx->is_active & EVENT_ALL)) - ctx->is_active = 0; - - if (ctx->task) { - WARN_ON_ONCE(cpuctx->task_ctx != ctx); - if (!ctx->is_active) - cpuctx->task_ctx = NULL; - } - /* * Always update time if it was set; not only when it changes. * Otherwise we can 'forget' to update time for any but the last @@ -3274,7 +3321,22 @@ static void ctx_sched_out(struct perf_event_context *ctx, if (is_active & EVENT_TIME) { /* update (and stop) ctx time */ update_context_time(ctx); - update_cgrp_time_from_cpuctx(cpuctx); + update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx); + /* + * CPU-release for the below ->is_active store, + * see __load_acquire() in perf_event_time_now() + */ + barrier(); + } + + ctx->is_active &= ~event_type; + if (!(ctx->is_active & EVENT_ALL)) + ctx->is_active = 0; + + if (ctx->task) { + WARN_ON_ONCE(cpuctx->task_ctx != ctx); + if (!ctx->is_active) + cpuctx->task_ctx = NULL; } is_active ^= ctx->is_active; /* changed bits */ @@ -3711,13 +3773,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, return 0; } +/* + * Because the userpage is strictly per-event (there is no concept of context, + * so there cannot be a context indirection), every userpage must be updated + * when context time starts :-( + * + * IOW, we must not miss EVENT_TIME edges. + */ static inline bool event_update_userpage(struct perf_event *event) { if (likely(!atomic_read(&event->mmap_count))) return false; perf_event_update_time(event); - perf_set_shadow_time(event, event->ctx); perf_event_update_userpage(event); return true; @@ -3801,13 +3869,23 @@ ctx_sched_in(struct perf_event_context *ctx, struct task_struct *task) { int is_active = ctx->is_active; - u64 now; lockdep_assert_held(&ctx->lock); if (likely(!ctx->nr_events)) return; + if (is_active ^ EVENT_TIME) { + /* start ctx time */ + __update_context_time(ctx, false); + perf_cgroup_set_timestamp(task, ctx); + /* + * CPU-release for the below ->is_active store, + * see __load_acquire() in perf_event_time_now() + */ + barrier(); + } + ctx->is_active |= (event_type | EVENT_TIME); if (ctx->task) { if (!is_active) @@ -3818,13 +3896,6 @@ ctx_sched_in(struct perf_event_context *ctx, is_active ^= ctx->is_active; /* changed bits */ - if (is_active & EVENT_TIME) { - /* start ctx time */ - now = perf_clock(); - ctx->timestamp = now; - perf_cgroup_set_timestamp(task, ctx); - } - /* * First go through the list and put on any pinned groups * in order to give them the best chance of going on. @@ -4418,6 +4489,18 @@ static inline u64 perf_event_count(struct perf_event *event) return local64_read(&event->count) + atomic64_read(&event->child_count); } +static void calc_timer_values(struct perf_event *event, + u64 *now, + u64 *enabled, + u64 *running) +{ + u64 ctx_time; + + *now = perf_clock(); + ctx_time = perf_event_time_now(event, *now); + __perf_update_times(event, ctx_time, enabled, running); +} + /* * NMI-safe method to read a local event, that is an event that * is: @@ -4477,10 +4560,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value, *value = local64_read(&event->count); if (enabled || running) { - u64 now = event->shadow_ctx_time + perf_clock(); - u64 __enabled, __running; + u64 __enabled, __running, __now;; - __perf_update_times(event, now, &__enabled, &__running); + calc_timer_values(event, &__now, &__enabled, &__running); if (enabled) *enabled = __enabled; if (running) @@ -5802,18 +5884,6 @@ static int perf_event_index(struct perf_event *event) return event->pmu->event_idx(event); } -static void calc_timer_values(struct perf_event *event, - u64 *now, - u64 *enabled, - u64 *running) -{ - u64 ctx_time; - - *now = perf_clock(); - ctx_time = event->shadow_ctx_time + *now; - __perf_update_times(event, ctx_time, enabled, running); -} - static void perf_event_init_userpage(struct perf_event *event) { struct perf_event_mmap_page *userpg; @@ -5938,6 +6008,8 @@ static void ring_buffer_attach(struct perf_event *event, struct perf_buffer *old_rb = NULL; unsigned long flags; + WARN_ON_ONCE(event->parent); + if (event->rb) { /* * Should be impossible, we set this when removing @@ -5995,6 +6067,9 @@ static void ring_buffer_wakeup(struct perf_event *event) { struct perf_buffer *rb; + if (event->parent) + event = event->parent; + rcu_read_lock(); rb = rcu_dereference(event->rb); if (rb) { @@ -6008,6 +6083,9 @@ struct perf_buffer *ring_buffer_get(struct perf_event *event) { struct perf_buffer *rb; + if (event->parent) + event = event->parent; + rcu_read_lock(); rb = rcu_dereference(event->rb); if (rb) { @@ -6353,7 +6431,6 @@ accounting: ring_buffer_attach(event, rb); perf_event_update_time(event); - perf_set_shadow_time(event, event->ctx); perf_event_init_userpage(event); perf_event_update_userpage(event); } else { @@ -6717,7 +6794,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event, if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id())) goto out; - rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); + rb = ring_buffer_get(sampler); if (!rb) goto out; @@ -6783,7 +6860,7 @@ static void perf_aux_sample_output(struct perf_event *event, if (WARN_ON_ONCE(!sampler || !data->aux_size)) return; - rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); + rb = ring_buffer_get(sampler); if (!rb) return; diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 9888e2bc8c76713b991ef52acf3fcff2331b250d..52501e5f765547e0fa83824c5777ad7d45cce077 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -63,7 +63,9 @@ static struct task_struct *watchdog_task; * Should we dump all CPUs backtraces in a hung task event? * Defaults to 0, can be changed via sysctl. */ -unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; +static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; +#else +#define sysctl_hung_task_all_cpu_backtrace 0 #endif /* CONFIG_SMP */ /* @@ -222,11 +224,13 @@ static long hung_timeout_jiffies(unsigned long last_checked, MAX_SCHEDULE_TIMEOUT; } +#ifdef CONFIG_SYSCTL /* * Process updating of timeout sysctl */ -int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +static int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) { int ret; @@ -241,6 +245,76 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, return ret; } +/* + * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs + * and hung_task_check_interval_secs + */ +static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ); +static struct ctl_table hung_task_sysctls[] = { +#ifdef CONFIG_SMP + { + .procname = "hung_task_all_cpu_backtrace", + .data = &sysctl_hung_task_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_SMP */ + { + .procname = "hung_task_panic", + .data = &sysctl_hung_task_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "hung_task_check_count", + .data = &sysctl_hung_task_check_count, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, + { + .procname = "hung_task_timeout_secs", + .data = &sysctl_hung_task_timeout_secs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dohung_task_timeout_secs, + .extra2 = (void *)&hung_task_timeout_max, + }, + { + .procname = "hung_task_check_interval_secs", + .data = &sysctl_hung_task_check_interval_secs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dohung_task_timeout_secs, + .extra2 = (void *)&hung_task_timeout_max, + }, + { + .procname = "hung_task_warnings", + .data = &sysctl_hung_task_warnings, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_NEG_ONE, + }, + {} +}; + +static void __init hung_task_sysctl_init(void) +{ + register_sysctl_init("kernel", hung_task_sysctls); +} +#else +#define hung_task_sysctl_init() do { } while (0) +#endif /* CONFIG_SYSCTL */ + + static atomic_t reset_hung_task = ATOMIC_INIT(0); void reset_hung_task_detector(void) @@ -310,6 +384,7 @@ static int __init hung_task_init(void) pm_notifier(hungtask_pm_notify, 0); watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); + hung_task_sysctl_init(); return 0; } diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index ee595ec09778da4fd822b518a934663992db108a..623b8136e9af3b86040102392e63c1ff4a5a04aa 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -137,7 +137,7 @@ static inline int irq_select_affinity_usr(unsigned int irq) static ssize_t write_irq_affinity(int type, struct file *file, const char __user *buffer, size_t count, loff_t *pos) { - unsigned int irq = (int)(long)PDE_DATA(file_inode(file)); + unsigned int irq = (int)(long)pde_data(file_inode(file)); cpumask_var_t new_value; int err; @@ -190,12 +190,12 @@ static ssize_t irq_affinity_list_proc_write(struct file *file, static int irq_affinity_proc_open(struct inode *inode, struct file *file) { - return single_open(file, irq_affinity_proc_show, PDE_DATA(inode)); + return single_open(file, irq_affinity_proc_show, pde_data(inode)); } static int irq_affinity_list_proc_open(struct inode *inode, struct file *file) { - return single_open(file, irq_affinity_list_proc_show, PDE_DATA(inode)); + return single_open(file, irq_affinity_list_proc_show, pde_data(inode)); } static const struct proc_ops irq_affinity_proc_ops = { @@ -265,7 +265,7 @@ out: static int default_affinity_open(struct inode *inode, struct file *file) { - return single_open(file, default_affinity_show, PDE_DATA(inode)); + return single_open(file, default_affinity_show, pde_data(inode)); } static const struct proc_ops default_affinity_proc_ops = { diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 21eccc961bba31cad928b2bf6e9bfc325191e64e..94cab8c9ce56ccd012343e974d09e5d456281b3c 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -48,6 +48,9 @@ #define KPROBE_HASH_BITS 6 #define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) +#if !defined(CONFIG_OPTPROBES) || !defined(CONFIG_SYSCTL) +#define kprobe_sysctls_init() do { } while (0) +#endif static int kprobes_initialized; /* kprobe_table can be accessed by @@ -938,10 +941,10 @@ static void unoptimize_all_kprobes(void) } static DEFINE_MUTEX(kprobe_sysctl_mutex); -int sysctl_kprobes_optimization; -int proc_kprobes_optimization_handler(struct ctl_table *table, int write, - void *buffer, size_t *length, - loff_t *ppos) +static int sysctl_kprobes_optimization; +static int proc_kprobes_optimization_handler(struct ctl_table *table, + int write, void *buffer, + size_t *length, loff_t *ppos) { int ret; @@ -957,6 +960,24 @@ int proc_kprobes_optimization_handler(struct ctl_table *table, int write, return ret; } + +static struct ctl_table kprobe_sysctls[] = { + { + .procname = "kprobes-optimization", + .data = &sysctl_kprobes_optimization, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_kprobes_optimization_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static void __init kprobe_sysctls_init(void) +{ + register_sysctl_init("debug", kprobe_sysctls); +} #endif /* CONFIG_SYSCTL */ /* Put a breakpoint for a probe. */ @@ -2584,6 +2605,7 @@ static int __init init_kprobes(void) err = register_module_notifier(&kprobe_module_nb); kprobes_initialized = (err == 0); + kprobe_sysctls_init(); return err; } early_initcall(init_kprobes); diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index b562f92893727c6885b1b75081783eb48770875f..7f49baaa49793abe495bfc7f681c85d6da78c7c0 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -300,6 +300,16 @@ void __lockfunc _raw_write_lock(rwlock_t *lock) __raw_write_lock(lock); } EXPORT_SYMBOL(_raw_write_lock); + +#ifndef CONFIG_DEBUG_LOCK_ALLOC +#define __raw_write_lock_nested(lock, subclass) __raw_write_lock(((void)(subclass), (lock))) +#endif + +void __lockfunc _raw_write_lock_nested(rwlock_t *lock, int subclass) +{ + __raw_write_lock_nested(lock, subclass); +} +EXPORT_SYMBOL(_raw_write_lock_nested); #endif #ifndef CONFIG_INLINE_WRITE_LOCK_IRQSAVE diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index 9e396a09fe0fdffa3196c1e8d581767d6f1b1292..48a19ed8486d8ef4b97e2a8ff73c222090ca9a64 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -239,6 +239,18 @@ void __sched rt_write_lock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_write_lock); +#ifdef CONFIG_DEBUG_LOCK_ALLOC +void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) +{ + rtlock_might_resched(); + rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_); + rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); + rcu_read_lock(); + migrate_disable(); +} +EXPORT_SYMBOL(rt_write_lock_nested); +#endif + void __sched rt_read_unlock(rwlock_t *rwlock) { rwlock_release(&rwlock->dep_map, _RET_IP_); diff --git a/kernel/module.c b/kernel/module.c index 24dab046e16cb73d2c684efa9f29371622affd48..46a5c2ed192855c92d12bebe3bba37aef575b2f9 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3725,12 +3725,6 @@ static noinline int do_init_module(struct module *mod) } freeinit->module_init = mod->init_layout.base; - /* - * We want to find out whether @mod uses async during init. Clear - * PF_USED_ASYNC. async_schedule*() will set it. - */ - current->flags &= ~PF_USED_ASYNC; - do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) @@ -3756,22 +3750,13 @@ static noinline int do_init_module(struct module *mod) /* * We need to finish all async code before the module init sequence - * is done. This has potential to deadlock. For example, a newly - * detected block device can trigger request_module() of the - * default iosched from async probing task. Once userland helper - * reaches here, async_synchronize_full() will wait on the async - * task waiting on request_module() and deadlock. - * - * This deadlock is avoided by perfomring async_synchronize_full() - * iff module init queued any async jobs. This isn't a full - * solution as it will deadlock the same if module loading from - * async jobs nests more than once; however, due to the various - * constraints, this hack seems to be the best option for now. - * Please refer to the following thread for details. + * is done. This has potential to deadlock if synchronous module + * loading is requested from async (which is not allowed!). * - * http://thread.gmane.org/gmane.linux.kernel/1420814 + * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous + * request_module() from async workers") for more details. */ - if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) + if (!mod->async_probe_requested) async_synchronize_full(); ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base + diff --git a/kernel/power/main.c b/kernel/power/main.c index 44169f3081fdca473a57d15e2b7698dd0b748353..7e646079fbeb2f3ca7a5f4bcea0cfb7ecdcb036d 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -504,7 +504,10 @@ static ssize_t pm_wakeup_irq_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA; + if (!pm_wakeup_irq()) + return -ENODATA; + + return sprintf(buf, "%u\n", pm_wakeup_irq()); } power_attr_ro(pm_wakeup_irq); diff --git a/kernel/power/process.c b/kernel/power/process.c index b7e7798637b8e6afee13d5642a58cbf88ef40411..11b570fcf0494a572b90bd0bc89deb9c621811a5 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -134,7 +134,7 @@ int freeze_processes(void) if (!pm_freezing) atomic_inc(&system_freezing_cnt); - pm_wakeup_clear(true); + pm_wakeup_clear(0); pr_info("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index f7a98607821350ddad7c50800a26e4ef20326f51..330d499376924806ea0ca4c839c1dcd66ae66ed4 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -978,8 +978,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm) * Register a range of page frames the contents of which should not be saved * during hibernation (to be used in the early initialization code). */ -void __init __register_nosave_region(unsigned long start_pfn, - unsigned long end_pfn, int use_kmalloc) +void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) { struct nosave_region *region; @@ -995,18 +994,12 @@ void __init __register_nosave_region(unsigned long start_pfn, goto Report; } } - if (use_kmalloc) { - /* During init, this shouldn't fail */ - region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); - BUG_ON(!region); - } else { - /* This allocation cannot fail */ - region = memblock_alloc(sizeof(struct nosave_region), - SMP_CACHE_BYTES); - if (!region) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct nosave_region)); - } + /* This allocation cannot fail */ + region = memblock_alloc(sizeof(struct nosave_region), + SMP_CACHE_BYTES); + if (!region) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct nosave_region)); region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 80cc1f0f502b8591cd4cc41bc1f916b9e76a2076..6fcdee7e87a57f32187b36156d90256ee34c2f17 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -136,8 +136,6 @@ static void s2idle_loop(void) break; } - pm_wakeup_clear(false); - s2idle_enter(); } diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 105df4dfc783971acdd1d17ee2971cb892c97d02..52571dcad768b988eaadbd3ce98a4ac42dd2f7dd 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) { struct rb_node *node; struct wakelock *wl; - char *str = buf; - char *end = buf + PAGE_SIZE; + int len = 0; mutex_lock(&wakelocks_lock); for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { wl = rb_entry(node, struct wakelock, node); if (wl->ws->active == show_active) - str += scnprintf(str, end - str, "%s ", wl->name); + len += sysfs_emit_at(buf, len, "%s ", wl->name); } - if (str > buf) - str--; - str += scnprintf(str, end - str, "\n"); + len += sysfs_emit_at(buf, len, "\n"); mutex_unlock(&wakelocks_lock); - return (str - buf); + return len; } #if CONFIG_PM_WAKELOCKS_LIMIT > 0 diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index d118739874c0da81e59fca3845c5ad14264d2261..f5b388e810b9fca6acf4654bf428eabdd6761170 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -2,5 +2,8 @@ obj-y = printk.o obj-$(CONFIG_PRINTK) += printk_safe.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o -obj-$(CONFIG_PRINTK) += printk_ringbuffer.o obj-$(CONFIG_PRINTK_INDEX) += index.o + +obj-$(CONFIG_PRINTK) += printk_support.o +printk_support-y := printk_ringbuffer.o +printk_support-$(CONFIG_SYSCTL) += sysctl.o diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 9f3ed2fdb72114286ffe7f3264664615d4c161de..d947ca6c84f997d3e4e657bc2010f0e22367efe2 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -4,6 +4,14 @@ */ #include +#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) +void __init printk_sysctl_init(void); +int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +#else +#define printk_sysctl_init() do { } while (0) +#endif + #ifdef CONFIG_PRINTK /* Flags for a single printk record. */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 155229f0cf0f6dc0effff9575521520036d97b9c..82abfaf3c2aadc17c1d5dd6896c351e999fa17fd 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -171,7 +171,7 @@ static int __init control_devkmsg(char *str) __setup("printk.devkmsg=", control_devkmsg); char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; - +#if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -210,6 +210,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, return 0; } +#endif /* CONFIG_PRINTK && CONFIG_SYSCTL */ /* Number of registered extended console drivers. */ static int nr_ext_console_drivers; @@ -3211,6 +3212,7 @@ static int __init printk_late_init(void) ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", console_cpu_notify, NULL); WARN_ON(ret < 0); + printk_sysctl_init(); return 0; } late_initcall(printk_late_init); diff --git a/kernel/printk/sysctl.c b/kernel/printk/sysctl.c new file mode 100644 index 0000000000000000000000000000000000000000..c228343eeb9759a152c189814d7564608b967919 --- /dev/null +++ b/kernel/printk/sysctl.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * sysctl.c: General linux system control interface + */ + +#include +#include +#include +#include +#include "internal.h" + +static const int ten_thousand = 10000; + +static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + return proc_dointvec_minmax(table, write, buffer, lenp, ppos); +} + +static struct ctl_table printk_sysctls[] = { + { + .procname = "printk", + .data = &console_loglevel, + .maxlen = 4*sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "printk_ratelimit", + .data = &printk_ratelimit_state.interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "printk_ratelimit_burst", + .data = &printk_ratelimit_state.burst, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "printk_delay", + .data = &printk_delay_msec, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = (void *)&ten_thousand, + }, + { + .procname = "printk_devkmsg", + .data = devkmsg_log_str, + .maxlen = DEVKMSG_STR_MAX_SIZE, + .mode = 0644, + .proc_handler = devkmsg_sysctl_set_loglvl, + }, + { + .procname = "dmesg_restrict", + .data = &dmesg_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "kptr_restrict", + .data = &kptr_restrict, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_sysadmin, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_TWO, + }, + {} +}; + +void __init printk_sysctl_init(void) +{ + register_sysctl_init("kernel", printk_sysctls); +} diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 84f1d91604cc2f87d30f02cf04ad191dda750478..d64f0b1d8cd3ba17202c97b519aa24e712710a35 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -123,7 +123,7 @@ static struct rcu_tasks rt_name = \ .call_func = call, \ .rtpcpu = &rt_name ## __percpu, \ .name = n, \ - .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS), \ + .percpu_enqueue_shift = ilog2(CONFIG_NR_CPUS) + 1, \ .percpu_enqueue_lim = 1, \ .percpu_dequeue_lim = 1, \ .barrier_q_mutex = __MUTEX_INITIALIZER(rt_name.barrier_q_mutex), \ @@ -216,6 +216,7 @@ static void cblist_init_generic(struct rcu_tasks *rtp) int cpu; unsigned long flags; int lim; + int shift; raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rcu_task_enqueue_lim < 0) { @@ -229,7 +230,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp) if (lim > nr_cpu_ids) lim = nr_cpu_ids; - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids / lim)); + shift = ilog2(nr_cpu_ids / lim); + if (((nr_cpu_ids - 1) >> shift) >= lim) + shift++; + WRITE_ONCE(rtp->percpu_enqueue_shift, shift); WRITE_ONCE(rtp->percpu_dequeue_lim, lim); smp_store_release(&rtp->percpu_enqueue_lim, lim); for_each_possible_cpu(cpu) { @@ -298,7 +302,7 @@ static void call_rcu_tasks_generic(struct rcu_head *rhp, rcu_callback_t func, if (unlikely(needadjust)) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim != nr_cpu_ids) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); + WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); WRITE_ONCE(rtp->percpu_dequeue_lim, nr_cpu_ids); smp_store_release(&rtp->percpu_enqueue_lim, nr_cpu_ids); pr_info("Switching %s to per-CPU callback queuing.\n", rtp->name); @@ -413,7 +417,7 @@ static int rcu_tasks_need_gpcb(struct rcu_tasks *rtp) if (rcu_task_cb_adjust && ncbs <= rcu_task_collapse_lim) { raw_spin_lock_irqsave(&rtp->cbs_gbl_lock, flags); if (rtp->percpu_enqueue_lim > 1) { - WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids)); + WRITE_ONCE(rtp->percpu_enqueue_shift, ilog2(nr_cpu_ids) + 1); smp_store_release(&rtp->percpu_enqueue_lim, 1); rtp->percpu_dequeue_gpseq = get_state_synchronize_rcu(); pr_info("Starting switch %s to CPU-0 callback queuing.\n", rtp->name); diff --git a/kernel/resource.c b/kernel/resource.c index 5ad3eba619ba755fedfce2462582e0f7c0116061..9c08d6e9eef27533baa011ed637c0b40338a0f3c 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -99,7 +99,7 @@ enum { MAX_IORES_LEVEL = 5 }; static void *r_start(struct seq_file *m, loff_t *pos) __acquires(resource_lock) { - struct resource *p = PDE_DATA(file_inode(m->file)); + struct resource *p = pde_data(file_inode(m->file)); loff_t l = 0; read_lock(&resource_lock); for (p = p->child; p && l < *pos; p = r_next(m, p, &l)) @@ -115,7 +115,7 @@ static void r_stop(struct seq_file *m, void *v) static int r_show(struct seq_file *m, void *v) { - struct resource *root = PDE_DATA(file_inode(m->file)); + struct resource *root = pde_data(file_inode(m->file)); struct resource *r = v, *p; unsigned long long start, end; int width = root->end < 0x10000 ? 4 : 8; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2e4ae00e52d14a94d890e6f086ac4b973aae2867..fcf0c180617c29417437ef98ffe9a67b713f34c2 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1214,8 +1214,9 @@ int tg_nop(struct task_group *tg, void *data) } #endif -static void set_load_weight(struct task_struct *p, bool update_load) +static void set_load_weight(struct task_struct *p) { + bool update_load = !(READ_ONCE(p->__state) & TASK_NEW); int prio = p->static_prio - MAX_RT_PRIO; struct load_weight *load = &p->se.load; @@ -4406,7 +4407,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) p->static_prio = NICE_TO_PRIO(0); p->prio = p->normal_prio = p->static_prio; - set_load_weight(p, false); + set_load_weight(p); /* * We don't need the reset flag anymore after the fork. It has @@ -5822,8 +5823,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) } if (schedstat_enabled() && rq->core->core_forceidle_count) { - if (cookie) - rq->core->core_forceidle_start = rq_clock(rq->core); + rq->core->core_forceidle_start = rq_clock(rq->core); rq->core->core_forceidle_occupation = occ; } @@ -6922,7 +6922,7 @@ void set_user_nice(struct task_struct *p, long nice) put_prev_task(rq, p); p->static_prio = NICE_TO_PRIO(nice); - set_load_weight(p, true); + set_load_weight(p); old_prio = p->prio; p->prio = effective_prio(p); @@ -7213,7 +7213,7 @@ static void __setscheduler_params(struct task_struct *p, */ p->rt_priority = attr->sched_priority; p->normal_prio = normal_prio(p); - set_load_weight(p, true); + set_load_weight(p); } /* @@ -8219,9 +8219,7 @@ int __cond_resched_lock(spinlock_t *lock) if (spin_needbreak(lock) || resched) { spin_unlock(lock); - if (resched) - preempt_schedule_common(); - else + if (!_cond_resched()) cpu_relax(); ret = 1; spin_lock(lock); @@ -8239,9 +8237,7 @@ int __cond_resched_rwlock_read(rwlock_t *lock) if (rwlock_needbreak(lock) || resched) { read_unlock(lock); - if (resched) - preempt_schedule_common(); - else + if (!_cond_resched()) cpu_relax(); ret = 1; read_lock(lock); @@ -8259,9 +8255,7 @@ int __cond_resched_rwlock_write(rwlock_t *lock) if (rwlock_needbreak(lock) || resched) { write_unlock(lock); - if (resched) - preempt_schedule_common(); - else + if (!_cond_resched()) cpu_relax(); ret = 1; write_lock(lock); @@ -9452,7 +9446,7 @@ void __init sched_init(void) #endif } - set_load_weight(&init_task, false); + set_load_weight(&init_task); /* * The boot idle thread does lazy MMU switching as well: diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index 1fb45672ec85060d42f56d742526930e0e3c2202..c8746a9a7ada336291137da17376fce6d5529f3c 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -277,7 +277,7 @@ void __sched_core_account_forceidle(struct rq *rq) rq_i = cpu_rq(i); p = rq_i->core_pick ?: rq_i->curr; - if (!p->core_cookie) + if (p == rq_i->idle) continue; __schedstat_add(p->stats.core_forceidle_sum, delta); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 095b0aa378df0903c6f34cbd34dfcca945d87758..5146163bfabb9ee3dd7dd7bd04c900257dea6220 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3028,9 +3028,11 @@ enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) static inline void dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { - u32 divider = get_pelt_divider(&se->avg); sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg); - cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider; + sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum); + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum, + cfs_rq->avg.load_avg * PELT_MIN_DIVIDER); } #else static inline void @@ -3381,7 +3383,6 @@ void set_task_rq_fair(struct sched_entity *se, se->avg.last_update_time = n_last_update_time; } - /* * When on migration a sched_entity joins/leaves the PELT hierarchy, we need to * propagate its contribution. The key to this propagation is the invariant @@ -3449,15 +3450,14 @@ void set_task_rq_fair(struct sched_entity *se, * XXX: only do this for the part of runnable > running ? * */ - static inline void update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long delta = gcfs_rq->avg.util_avg - se->avg.util_avg; - u32 divider; + long delta_sum, delta_avg = gcfs_rq->avg.util_avg - se->avg.util_avg; + u32 new_sum, divider; /* Nothing to update */ - if (!delta) + if (!delta_avg) return; /* @@ -3466,23 +3466,30 @@ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq */ divider = get_pelt_divider(&cfs_rq->avg); + /* Set new sched_entity's utilization */ se->avg.util_avg = gcfs_rq->avg.util_avg; - se->avg.util_sum = se->avg.util_avg * divider; + new_sum = se->avg.util_avg * divider; + delta_sum = (long)new_sum - (long)se->avg.util_sum; + se->avg.util_sum = new_sum; /* Update parent cfs_rq utilization */ - add_positive(&cfs_rq->avg.util_avg, delta); - cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider; + add_positive(&cfs_rq->avg.util_avg, delta_avg); + add_positive(&cfs_rq->avg.util_sum, delta_sum); + + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum, + cfs_rq->avg.util_avg * PELT_MIN_DIVIDER); } static inline void update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg; - u32 divider; + long delta_sum, delta_avg = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg; + u32 new_sum, divider; /* Nothing to update */ - if (!delta) + if (!delta_avg) return; /* @@ -3493,19 +3500,25 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf /* Set new sched_entity's runnable */ se->avg.runnable_avg = gcfs_rq->avg.runnable_avg; - se->avg.runnable_sum = se->avg.runnable_avg * divider; + new_sum = se->avg.runnable_avg * divider; + delta_sum = (long)new_sum - (long)se->avg.runnable_sum; + se->avg.runnable_sum = new_sum; /* Update parent cfs_rq runnable */ - add_positive(&cfs_rq->avg.runnable_avg, delta); - cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider; + add_positive(&cfs_rq->avg.runnable_avg, delta_avg); + add_positive(&cfs_rq->avg.runnable_sum, delta_sum); + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum, + cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER); } static inline void update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; + long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; unsigned long load_avg; u64 load_sum = 0; + s64 delta_sum; u32 divider; if (!runnable_sum) @@ -3532,7 +3545,7 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq * assuming all tasks are equally runnable. */ if (scale_load_down(gcfs_rq->load.weight)) { - load_sum = div_s64(gcfs_rq->avg.load_sum, + load_sum = div_u64(gcfs_rq->avg.load_sum, scale_load_down(gcfs_rq->load.weight)); } @@ -3549,19 +3562,22 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT; runnable_sum = max(runnable_sum, running_sum); - load_sum = (s64)se_weight(se) * runnable_sum; - load_avg = div_s64(load_sum, divider); - - se->avg.load_sum = runnable_sum; + load_sum = se_weight(se) * runnable_sum; + load_avg = div_u64(load_sum, divider); - delta = load_avg - se->avg.load_avg; - if (!delta) + delta_avg = load_avg - se->avg.load_avg; + if (!delta_avg) return; - se->avg.load_avg = load_avg; + delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum; - add_positive(&cfs_rq->avg.load_avg, delta); - cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider; + se->avg.load_sum = runnable_sum; + se->avg.load_avg = load_avg; + add_positive(&cfs_rq->avg.load_avg, delta_avg); + add_positive(&cfs_rq->avg.load_sum, delta_sum); + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.load_sum = max_t(u32, cfs_rq->avg.load_sum, + cfs_rq->avg.load_avg * PELT_MIN_DIVIDER); } static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum) @@ -3652,7 +3668,7 @@ static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum * * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example. * - * Returns true if the load decayed or we removed load. + * Return: true if the load decayed or we removed load. * * Since both these conditions indicate a changed cfs_rq->avg.load we should * call update_tg_load_avg() when this function returns true. @@ -3677,15 +3693,32 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) r = removed_load; sub_positive(&sa->load_avg, r); - sa->load_sum = sa->load_avg * divider; + sub_positive(&sa->load_sum, r * divider); + /* See sa->util_sum below */ + sa->load_sum = max_t(u32, sa->load_sum, sa->load_avg * PELT_MIN_DIVIDER); r = removed_util; sub_positive(&sa->util_avg, r); - sa->util_sum = sa->util_avg * divider; + sub_positive(&sa->util_sum, r * divider); + /* + * Because of rounding, se->util_sum might ends up being +1 more than + * cfs->util_sum. Although this is not a problem by itself, detaching + * a lot of tasks with the rounding problem between 2 updates of + * util_avg (~1ms) can make cfs->util_sum becoming null whereas + * cfs_util_avg is not. + * Check that util_sum is still above its lower bound for the new + * util_avg. Given that period_contrib might have moved since the last + * sync, we are only sure that util_sum must be above or equal to + * util_avg * minimum possible divider + */ + sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER); r = removed_runnable; sub_positive(&sa->runnable_avg, r); - sa->runnable_sum = sa->runnable_avg * divider; + sub_positive(&sa->runnable_sum, r * divider); + /* See sa->util_sum above */ + sa->runnable_sum = max_t(u32, sa->runnable_sum, + sa->runnable_avg * PELT_MIN_DIVIDER); /* * removed_runnable is the unweighted version of removed_load so we @@ -3772,17 +3805,18 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s */ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { - /* - * cfs_rq->avg.period_contrib can be used for both cfs_rq and se. - * See ___update_load_avg() for details. - */ - u32 divider = get_pelt_divider(&cfs_rq->avg); - dequeue_load_avg(cfs_rq, se); sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); - cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider; + sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.util_sum = max_t(u32, cfs_rq->avg.util_sum, + cfs_rq->avg.util_avg * PELT_MIN_DIVIDER); + sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg); - cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider; + sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum); + /* See update_cfs_rq_load_avg() */ + cfs_rq->avg.runnable_sum = max_t(u32, cfs_rq->avg.runnable_sum, + cfs_rq->avg.runnable_avg * PELT_MIN_DIVIDER); add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum); @@ -8539,6 +8573,8 @@ group_type group_classify(unsigned int imbalance_pct, * * If @sg does not have SMT siblings, only pull tasks if all of the SMT siblings * of @dst_cpu are idle and @sg has lower priority. + * + * Return: true if @dst_cpu can pull tasks, false otherwise. */ static bool asym_smt_can_pull_tasks(int dst_cpu, struct sd_lb_stats *sds, struct sg_lb_stats *sgs, @@ -8614,6 +8650,7 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. * @env: The load balancing environment. + * @sds: Load-balancing data with statistics of the local group. * @group: sched_group whose statistics are to be updated. * @sgs: variable to hold the statistics for this group. * @sg_status: Holds flag indicating the status of the sched_group @@ -9421,12 +9458,11 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s /** * find_busiest_group - Returns the busiest group within the sched_domain * if there is an imbalance. + * @env: The load balancing environment. * * Also calculates the amount of runnable load which should be moved * to restore balance. * - * @env: The load balancing environment. - * * Return: - The busiest group if imbalance exists. */ static struct sched_group *find_busiest_group(struct lb_env *env) diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index b5add64d9698c60813f6640d0c6a3e944f21aba1..3d2825408e3a2544fd19ace94839d8ed34e20929 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -147,11 +147,11 @@ #endif #ifdef CONFIG_RSEQ -#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK \ +#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \ (MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \ - | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK) + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ) #else -#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK 0 +#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0 #endif #define MEMBARRIER_CMD_BITMASK \ @@ -159,7 +159,8 @@ | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \ | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \ - | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK) + | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ + | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK) static void ipi_mb(void *info) { diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index e06071bf3472c4eb40744ae9be0ffeffa07c9df8..c336f5f481bca25c781991eea01f956fa60c320d 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running) } #endif +#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024) + static inline u32 get_pelt_divider(struct sched_avg *avg) { - return LOAD_AVG_MAX - 1024 + avg->period_contrib; + return PELT_MIN_DIVIDER + avg->period_contrib; } static inline void cfs_se_util_change(struct sched_avg *avg) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index a679613a7cb748bd895d1d217d1ac55b60e6f7e5..e143581788497a4526ecf9d639530f7303a17fd1 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1082,44 +1082,6 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) return 0; } -static int psi_io_show(struct seq_file *m, void *v) -{ - return psi_show(m, &psi_system, PSI_IO); -} - -static int psi_memory_show(struct seq_file *m, void *v) -{ - return psi_show(m, &psi_system, PSI_MEM); -} - -static int psi_cpu_show(struct seq_file *m, void *v) -{ - return psi_show(m, &psi_system, PSI_CPU); -} - -static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *)) -{ - if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE)) - return -EPERM; - - return single_open(file, psi_show, NULL); -} - -static int psi_io_open(struct inode *inode, struct file *file) -{ - return psi_open(file, psi_io_show); -} - -static int psi_memory_open(struct inode *inode, struct file *file) -{ - return psi_open(file, psi_memory_show); -} - -static int psi_cpu_open(struct inode *inode, struct file *file) -{ - return psi_open(file, psi_cpu_show); -} - struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res) { @@ -1162,7 +1124,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, t->event = 0; t->last_event_time = 0; init_waitqueue_head(&t->event_wait); - kref_init(&t->refcount); mutex_lock(&group->trigger_lock); @@ -1191,15 +1152,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, return t; } -static void psi_trigger_destroy(struct kref *ref) +void psi_trigger_destroy(struct psi_trigger *t) { - struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount); - struct psi_group *group = t->group; + struct psi_group *group; struct task_struct *task_to_destroy = NULL; - if (static_branch_likely(&psi_disabled)) + /* + * We do not check psi_disabled since it might have been disabled after + * the trigger got created. + */ + if (!t) return; + group = t->group; /* * Wakeup waiters to stop polling. Can happen if cgroup is deleted * from under a polling process. @@ -1235,9 +1200,9 @@ static void psi_trigger_destroy(struct kref *ref) mutex_unlock(&group->trigger_lock); /* - * Wait for both *trigger_ptr from psi_trigger_replace and - * poll_task RCUs to complete their read-side critical sections - * before destroying the trigger and optionally the poll_task + * Wait for psi_schedule_poll_work RCU to complete its read-side + * critical section before destroying the trigger and optionally the + * poll_task. */ synchronize_rcu(); /* @@ -1254,18 +1219,6 @@ static void psi_trigger_destroy(struct kref *ref) kfree(t); } -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new) -{ - struct psi_trigger *old = *trigger_ptr; - - if (static_branch_likely(&psi_disabled)) - return; - - rcu_assign_pointer(*trigger_ptr, new); - if (old) - kref_put(&old->refcount, psi_trigger_destroy); -} - __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait) { @@ -1275,27 +1228,57 @@ __poll_t psi_trigger_poll(void **trigger_ptr, if (static_branch_likely(&psi_disabled)) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - rcu_read_lock(); - - t = rcu_dereference(*(void __rcu __force **)trigger_ptr); - if (!t) { - rcu_read_unlock(); + t = smp_load_acquire(trigger_ptr); + if (!t) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - } - kref_get(&t->refcount); - - rcu_read_unlock(); poll_wait(file, &t->event_wait, wait); if (cmpxchg(&t->event, 1, 0) == 1) ret |= EPOLLPRI; - kref_put(&t->refcount, psi_trigger_destroy); - return ret; } +#ifdef CONFIG_PROC_FS +static int psi_io_show(struct seq_file *m, void *v) +{ + return psi_show(m, &psi_system, PSI_IO); +} + +static int psi_memory_show(struct seq_file *m, void *v) +{ + return psi_show(m, &psi_system, PSI_MEM); +} + +static int psi_cpu_show(struct seq_file *m, void *v) +{ + return psi_show(m, &psi_system, PSI_CPU); +} + +static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *)) +{ + if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + return single_open(file, psi_show, NULL); +} + +static int psi_io_open(struct inode *inode, struct file *file) +{ + return psi_open(file, psi_io_show); +} + +static int psi_memory_open(struct inode *inode, struct file *file) +{ + return psi_open(file, psi_memory_show); +} + +static int psi_cpu_open(struct inode *inode, struct file *file) +{ + return psi_open(file, psi_cpu_show); +} + static ssize_t psi_write(struct file *file, const char __user *user_buf, size_t nbytes, enum psi_res res) { @@ -1316,14 +1299,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, buf[buf_size - 1] = '\0'; - new = psi_trigger_create(&psi_system, buf, nbytes, res); - if (IS_ERR(new)) - return PTR_ERR(new); - seq = file->private_data; + /* Take seq->lock to protect seq->private from concurrent writes */ mutex_lock(&seq->lock); - psi_trigger_replace(&seq->private, new); + + /* Allow only one trigger per file descriptor */ + if (seq->private) { + mutex_unlock(&seq->lock); + return -EBUSY; + } + + new = psi_trigger_create(&psi_system, buf, nbytes, res); + if (IS_ERR(new)) { + mutex_unlock(&seq->lock); + return PTR_ERR(new); + } + + smp_store_release(&seq->private, new); mutex_unlock(&seq->lock); return nbytes; @@ -1358,7 +1351,7 @@ static int psi_fop_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; - psi_trigger_replace(&seq->private, NULL); + psi_trigger_destroy(seq->private); return single_release(inode, file); } @@ -1400,3 +1393,5 @@ static int __init psi_proc_init(void) return 0; } module_init(psi_proc_init); + +#endif /* CONFIG_PROC_FS */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 4d8f44a177274f58f414c2d225498f648f4f88b1..db10e73d06e028371702b745bc2652f193828a7a 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -29,6 +29,9 @@ #include #include +/* Not exposed in headers: strictly internal use only. */ +#define SECCOMP_MODE_DEAD (SECCOMP_MODE_FILTER + 1) + #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER #include #endif @@ -1010,6 +1013,7 @@ static void __secure_computing_strict(int this_syscall) #ifdef SECCOMP_DEBUG dump_stack(); #endif + current->seccomp.mode = SECCOMP_MODE_DEAD; seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true); do_exit(SIGKILL); } @@ -1261,6 +1265,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, case SECCOMP_RET_KILL_THREAD: case SECCOMP_RET_KILL_PROCESS: default: + current->seccomp.mode = SECCOMP_MODE_DEAD; seccomp_log(this_syscall, SIGSYS, action, true); /* Dump core only if this is the last remaining thread. */ if (action != SECCOMP_RET_KILL_THREAD || @@ -1309,6 +1314,11 @@ int __secure_computing(const struct seccomp_data *sd) return 0; case SECCOMP_MODE_FILTER: return __seccomp_filter(this_syscall, sd, false); + /* Surviving SECCOMP_RET_KILL_* must be proactively impossible. */ + case SECCOMP_MODE_DEAD: + WARN_ON_ONCE(1); + do_exit(SIGKILL); + return -1; default: BUG(); } diff --git a/kernel/signal.c b/kernel/signal.c index 38602738866e3254186e4336b71544dacc787449..9b04631acde8fe8f866ea4d757e61a55f2bb76fe 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1342,9 +1342,10 @@ force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, } /* * Don't clear SIGNAL_UNKILLABLE for traced tasks, users won't expect - * debugging to leave init killable. + * debugging to leave init killable. But HANDLER_EXIT is always fatal. */ - if (action->sa.sa_handler == SIG_DFL && !t->ptrace) + if (action->sa.sa_handler == SIG_DFL && + (!t->ptrace || (handler == HANDLER_EXIT))) t->signal->flags &= ~SIGNAL_UNKILLABLE; ret = send_signal(sig, info, t, PIDTYPE_PID); spin_unlock_irqrestore(&t->sighand->siglock, flags); diff --git a/kernel/stackleak.c b/kernel/stackleak.c index ce161a8e8d97585c8b1152fec1ea088343cf15d8..ddb5a7f48d69e665e5d381901f1bdc9c808f819c 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -16,11 +16,13 @@ #ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE #include #include +#include static DEFINE_STATIC_KEY_FALSE(stack_erasing_bypass); -int stack_erasing_sysctl(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) +#ifdef CONFIG_SYSCTL +static int stack_erasing_sysctl(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; int state = !static_branch_unlikely(&stack_erasing_bypass); @@ -42,13 +44,33 @@ int stack_erasing_sysctl(struct ctl_table *table, int write, state ? "enabled" : "disabled"); return ret; } +static struct ctl_table stackleak_sysctls[] = { + { + .procname = "stack_erasing", + .data = NULL, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = stack_erasing_sysctl, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + +static int __init stackleak_sysctls_init(void) +{ + register_sysctl_init("kernel", stackleak_sysctls); + return 0; +} +late_initcall(stackleak_sysctls_init); +#endif /* CONFIG_SYSCTL */ #define skip_erasing() static_branch_unlikely(&stack_erasing_bypass) #else #define skip_erasing() false #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */ -asmlinkage void notrace stackleak_erase(void) +asmlinkage void noinstr stackleak_erase(void) { /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */ unsigned long kstack_ptr = current->lowest_stack; @@ -102,9 +124,8 @@ asmlinkage void notrace stackleak_erase(void) /* Reset the 'lowest_stack' value for the next syscall */ current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64; } -NOKPROBE_SYMBOL(stackleak_erase); -void __used __no_caller_saved_registers notrace stackleak_track_stack(void) +void __used __no_caller_saved_registers noinstr stackleak_track_stack(void) { unsigned long sp = current_stack_pointer; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ef77be575d8754d27c20c0496ba72d008aaddff6..5ae443b2882e2f4698a4a71a44be462613967308 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -20,7 +20,6 @@ */ #include -#include #include #include #include @@ -50,7 +49,6 @@ #include #include #include -#include #include #include #include @@ -58,19 +56,15 @@ #include #include #include -#include -#include #include #include #include #include #include -#include #include #include #include #include -#include #include #include #include @@ -97,65 +91,21 @@ #if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_LOCK_STAT) #include #endif -#ifdef CONFIG_CHR_DEV_SG -#include -#endif -#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE -#include -#endif -#ifdef CONFIG_LOCKUP_DETECTOR -#include -#endif #if defined(CONFIG_SYSCTL) /* Constants used for minimum and maximum */ -#ifdef CONFIG_LOCKUP_DETECTOR -static int sixty = 60; -#endif - -static int __maybe_unused neg_one = -1; -static int __maybe_unused two = 2; -static int __maybe_unused four = 4; -static unsigned long zero_ul; -static unsigned long one_ul = 1; -static unsigned long long_max = LONG_MAX; -static int one_hundred = 100; -static int two_hundred = 200; -static int one_thousand = 1000; -static int three_thousand = 3000; -#ifdef CONFIG_PRINTK -static int ten_thousand = 10000; -#endif + #ifdef CONFIG_PERF_EVENTS -static int six_hundred_forty_kb = 640 * 1024; +static const int six_hundred_forty_kb = 640 * 1024; #endif /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */ -static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; - -/* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ -static int maxolduid = 65535; -static int minolduid; +static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE; -static int ngroups_max = NGROUPS_MAX; +static const int ngroups_max = NGROUPS_MAX; static const int cap_last_cap = CAP_LAST_CAP; -/* - * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs - * and hung_task_check_interval_secs - */ -#ifdef CONFIG_DETECT_HUNG_TASK -static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); -#endif - -#ifdef CONFIG_INOTIFY_USER -#include -#endif -#ifdef CONFIG_FANOTIFY -#include -#endif - #ifdef CONFIG_PROC_SYSCTL /** @@ -192,8 +142,8 @@ int sysctl_legacy_va_layout; #endif #ifdef CONFIG_COMPACTION -static int min_extfrag_threshold; -static int max_extfrag_threshold = 1000; +/* min_extfrag_threshold is SYSCTL_ZERO */; +static const int max_extfrag_threshold = 1000; #endif #endif /* CONFIG_SYSCTL */ @@ -804,12 +754,12 @@ static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table, return do_proc_douintvec_r(i, buffer, lenp, ppos, conv, data); } -static int do_proc_douintvec(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos, - int (*conv)(unsigned long *lvalp, - unsigned int *valp, - int write, void *data), - void *data) +int do_proc_douintvec(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos, + int (*conv)(unsigned long *lvalp, + unsigned int *valp, + int write, void *data), + void *data) { return __do_proc_douintvec(table->data, table, write, buffer, lenp, ppos, conv, data); @@ -938,17 +888,6 @@ static int proc_taint(struct ctl_table *table, int write, return err; } -#ifdef CONFIG_PRINTK -static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - if (write && !capable(CAP_SYS_ADMIN)) - return -EPERM; - - return proc_dointvec_minmax(table, write, buffer, lenp, ppos); -} -#endif - /** * struct do_proc_dointvec_minmax_conv_param - proc_dointvec_minmax() range checking structure * @min: pointer to minimum allowable value @@ -1144,67 +1083,6 @@ int proc_dou8vec_minmax(struct ctl_table *table, int write, } EXPORT_SYMBOL_GPL(proc_dou8vec_minmax); -static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, - unsigned int *valp, - int write, void *data) -{ - if (write) { - unsigned int val; - - val = round_pipe_size(*lvalp); - if (val == 0) - return -EINVAL; - - *valp = val; - } else { - unsigned int val = *valp; - *lvalp = (unsigned long) val; - } - - return 0; -} - -static int proc_dopipe_max_size(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - return do_proc_douintvec(table, write, buffer, lenp, ppos, - do_proc_dopipe_max_size_conv, NULL); -} - -static void validate_coredump_safety(void) -{ -#ifdef CONFIG_COREDUMP - if (suid_dumpable == SUID_DUMP_ROOT && - core_pattern[0] != '/' && core_pattern[0] != '|') { - printk(KERN_WARNING -"Unsafe core_pattern used with fs.suid_dumpable=2.\n" -"Pipe handler or fully qualified core dump path required.\n" -"Set kernel.core_pattern before fs.suid_dumpable.\n" - ); - } -#endif -} - -static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); - if (!error) - validate_coredump_safety(); - return error; -} - -#ifdef CONFIG_COREDUMP -static int proc_dostring_coredump(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int error = proc_dostring(table, write, buffer, lenp, ppos); - if (!error) - validate_coredump_safety(); - return error; -} -#endif - #ifdef CONFIG_MAGIC_SYSRQ static int sysrq_sysctl_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -1267,10 +1145,11 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, err = proc_get_long(&p, &left, &val, &neg, proc_wspace_sep, sizeof(proc_wspace_sep), NULL); - if (err) + if (err || neg) { + err = -EINVAL; break; - if (neg) - continue; + } + val = convmul * val / convdiv; if ((min && val < *min) || (max && val > *max)) { err = -EINVAL; @@ -1928,29 +1807,6 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, -#ifdef CONFIG_COREDUMP - { - .procname = "core_uses_pid", - .data = &core_uses_pid, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "core_pattern", - .data = core_pattern, - .maxlen = CORENAME_MAX_SIZE, - .mode = 0644, - .proc_handler = proc_dostring_coredump, - }, - { - .procname = "core_pipe_limit", - .data = &core_pipe_limit, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", @@ -1964,7 +1820,7 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &neg_one, + .extra1 = SYSCTL_NEG_ONE, .extra2 = SYSCTL_ONE, }, #endif @@ -2131,15 +1987,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dostring, }, #endif -#ifdef CONFIG_CHR_DEV_SG - { - .procname = "sg-big-buff", - .data = &sg_big_buff, - .maxlen = sizeof (int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, -#endif #ifdef CONFIG_BSD_PROCESS_ACCT { .procname = "acct", @@ -2174,31 +2021,19 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = sysctl_max_threads, }, - { - .procname = "random", - .mode = 0555, - .child = random_table, - }, { .procname = "usermodehelper", .mode = 0555, .child = usermodehelper_table, }, -#ifdef CONFIG_FW_LOADER_USER_HELPER - { - .procname = "firmware_config", - .mode = 0555, - .child = firmware_config_table, - }, -#endif { .procname = "overflowuid", .data = &overflowuid, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &minolduid, - .extra2 = &maxolduid, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_MAXOLDUID, }, { .procname = "overflowgid", @@ -2206,8 +2041,8 @@ static struct ctl_table kern_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &minolduid, - .extra2 = &maxolduid, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_MAXOLDUID, }, #ifdef CONFIG_S390 { @@ -2252,66 +2087,9 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, -#if defined CONFIG_PRINTK - { - .procname = "printk", - .data = &console_loglevel, - .maxlen = 4*sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "printk_ratelimit", - .data = &printk_ratelimit_state.interval, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "printk_ratelimit_burst", - .data = &printk_ratelimit_state.burst, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "printk_delay", - .data = &printk_delay_msec, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &ten_thousand, - }, - { - .procname = "printk_devkmsg", - .data = devkmsg_log_str, - .maxlen = DEVKMSG_STR_MAX_SIZE, - .mode = 0644, - .proc_handler = devkmsg_sysctl_set_loglvl, - }, - { - .procname = "dmesg_restrict", - .data = &dmesg_restrict, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax_sysadmin, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "kptr_restrict", - .data = &kptr_restrict, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax_sysadmin, - .extra1 = SYSCTL_ZERO, - .extra2 = &two, - }, -#endif { .procname = "ngroups_max", - .data = &ngroups_max, + .data = (void *)&ngroups_max, .maxlen = sizeof (int), .mode = 0444, .proc_handler = proc_dointvec, @@ -2323,96 +2101,6 @@ static struct ctl_table kern_table[] = { .mode = 0444, .proc_handler = proc_dointvec, }, -#if defined(CONFIG_LOCKUP_DETECTOR) - { - .procname = "watchdog", - .data = &watchdog_user_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_watchdog, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "watchdog_thresh", - .data = &watchdog_thresh, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_watchdog_thresh, - .extra1 = SYSCTL_ZERO, - .extra2 = &sixty, - }, - { - .procname = "nmi_watchdog", - .data = &nmi_watchdog_user_enabled, - .maxlen = sizeof(int), - .mode = NMI_WATCHDOG_SYSCTL_PERM, - .proc_handler = proc_nmi_watchdog, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "watchdog_cpumask", - .data = &watchdog_cpumask_bits, - .maxlen = NR_CPUS, - .mode = 0644, - .proc_handler = proc_watchdog_cpumask, - }, -#ifdef CONFIG_SOFTLOCKUP_DETECTOR - { - .procname = "soft_watchdog", - .data = &soft_watchdog_user_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_soft_watchdog, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "softlockup_panic", - .data = &softlockup_panic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#ifdef CONFIG_SMP - { - .procname = "softlockup_all_cpu_backtrace", - .data = &sysctl_softlockup_all_cpu_backtrace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SMP */ -#endif -#ifdef CONFIG_HARDLOCKUP_DETECTOR - { - .procname = "hardlockup_panic", - .data = &hardlockup_panic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#ifdef CONFIG_SMP - { - .procname = "hardlockup_all_cpu_backtrace", - .data = &sysctl_hardlockup_all_cpu_backtrace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SMP */ -#endif -#endif - #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) { .procname = "unknown_nmi_panic", @@ -2515,60 +2203,6 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif -#ifdef CONFIG_DETECT_HUNG_TASK -#ifdef CONFIG_SMP - { - .procname = "hung_task_all_cpu_backtrace", - .data = &sysctl_hung_task_all_cpu_backtrace, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, -#endif /* CONFIG_SMP */ - { - .procname = "hung_task_panic", - .data = &sysctl_hung_task_panic, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "hung_task_check_count", - .data = &sysctl_hung_task_check_count, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - }, - { - .procname = "hung_task_timeout_secs", - .data = &sysctl_hung_task_timeout_secs, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = proc_dohung_task_timeout_secs, - .extra2 = &hung_task_timeout_max, - }, - { - .procname = "hung_task_check_interval_secs", - .data = &sysctl_hung_task_check_interval_secs, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = proc_dohung_task_timeout_secs, - .extra2 = &hung_task_timeout_max, - }, - { - .procname = "hung_task_warnings", - .data = &sysctl_hung_task_warnings, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &neg_one, - }, -#endif #ifdef CONFIG_RT_MUTEXES { .procname = "max_lock_depth", @@ -2628,7 +2262,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = perf_cpu_time_max_percent_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, { .procname = "perf_event_max_stack", @@ -2637,7 +2271,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = perf_event_max_stack_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &six_hundred_forty_kb, + .extra2 = (void *)&six_hundred_forty_kb, }, { .procname = "perf_event_max_contexts_per_stack", @@ -2646,7 +2280,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = perf_event_max_stack_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_thousand, + .extra2 = SYSCTL_ONE_THOUSAND, }, #endif { @@ -2677,7 +2311,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = bpf_unpriv_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "bpf_stats_enabled", @@ -2708,17 +2342,6 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, -#endif -#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE - { - .procname = "stack_erasing", - .data = NULL, - .maxlen = sizeof(int), - .mode = 0600, - .proc_handler = stack_erasing_sysctl, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #endif { } }; @@ -2731,7 +2354,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = overcommit_policy_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "panic_on_oom", @@ -2740,7 +2363,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = SYSCTL_TWO, }, { .procname = "oom_kill_allocating_task", @@ -2785,7 +2408,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = dirty_background_ratio_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, { .procname = "dirty_background_bytes", @@ -2793,7 +2416,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(dirty_background_bytes), .mode = 0644, .proc_handler = dirty_background_bytes_handler, - .extra1 = &one_ul, + .extra1 = SYSCTL_LONG_ONE, }, { .procname = "dirty_ratio", @@ -2802,7 +2425,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = dirty_ratio_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, { .procname = "dirty_bytes", @@ -2810,7 +2433,7 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(vm_dirty_bytes), .mode = 0644, .proc_handler = dirty_bytes_handler, - .extra1 = &dirty_bytes_min, + .extra1 = (void *)&dirty_bytes_min, }, { .procname = "dirty_writeback_centisecs", @@ -2842,7 +2465,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, - .extra2 = &two_hundred, + .extra2 = SYSCTL_TWO_HUNDRED, }, #ifdef CONFIG_HUGETLB_PAGE { @@ -2899,7 +2522,7 @@ static struct ctl_table vm_table[] = { .mode = 0200, .proc_handler = drop_caches_sysctl_handler, .extra1 = SYSCTL_ONE, - .extra2 = &four, + .extra2 = SYSCTL_FOUR, }, #ifdef CONFIG_COMPACTION { @@ -2916,7 +2539,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = compaction_proactiveness_sysctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, { .procname = "extfrag_threshold", @@ -2924,8 +2547,8 @@ static struct ctl_table vm_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &min_extfrag_threshold, - .extra2 = &max_extfrag_threshold, + .extra1 = SYSCTL_ZERO, + .extra2 = (void *)&max_extfrag_threshold, }, { .procname = "compact_unevictable_allowed", @@ -2961,7 +2584,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = watermark_scale_factor_sysctl_handler, .extra1 = SYSCTL_ONE, - .extra2 = &three_thousand, + .extra2 = SYSCTL_THREE_THOUSAND, }, { .procname = "percpu_pagelist_high_fraction", @@ -3040,7 +2663,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = sysctl_min_unmapped_ratio_sysctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, { .procname = "min_slab_ratio", @@ -3049,7 +2672,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = sysctl_min_slab_ratio_sysctl_handler, .extra1 = SYSCTL_ZERO, - .extra2 = &one_hundred, + .extra2 = SYSCTL_ONE_HUNDRED, }, #endif #ifdef CONFIG_SMP @@ -3183,221 +2806,6 @@ static struct ctl_table vm_table[] = { { } }; -static struct ctl_table fs_table[] = { - { - .procname = "inode-nr", - .data = &inodes_stat, - .maxlen = 2*sizeof(long), - .mode = 0444, - .proc_handler = proc_nr_inodes, - }, - { - .procname = "inode-state", - .data = &inodes_stat, - .maxlen = 7*sizeof(long), - .mode = 0444, - .proc_handler = proc_nr_inodes, - }, - { - .procname = "file-nr", - .data = &files_stat, - .maxlen = sizeof(files_stat), - .mode = 0444, - .proc_handler = proc_nr_files, - }, - { - .procname = "file-max", - .data = &files_stat.max_files, - .maxlen = sizeof(files_stat.max_files), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax, - .extra1 = &zero_ul, - .extra2 = &long_max, - }, - { - .procname = "nr_open", - .data = &sysctl_nr_open, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &sysctl_nr_open_min, - .extra2 = &sysctl_nr_open_max, - }, - { - .procname = "dentry-state", - .data = &dentry_stat, - .maxlen = 6*sizeof(long), - .mode = 0444, - .proc_handler = proc_nr_dentry, - }, - { - .procname = "overflowuid", - .data = &fs_overflowuid, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &minolduid, - .extra2 = &maxolduid, - }, - { - .procname = "overflowgid", - .data = &fs_overflowgid, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &minolduid, - .extra2 = &maxolduid, - }, -#ifdef CONFIG_FILE_LOCKING - { - .procname = "leases-enable", - .data = &leases_enable, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif -#ifdef CONFIG_DNOTIFY - { - .procname = "dir-notify-enable", - .data = &dir_notify_enable, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif -#ifdef CONFIG_MMU -#ifdef CONFIG_FILE_LOCKING - { - .procname = "lease-break-time", - .data = &lease_break_time, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#endif -#ifdef CONFIG_AIO - { - .procname = "aio-nr", - .data = &aio_nr, - .maxlen = sizeof(aio_nr), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "aio-max-nr", - .data = &aio_max_nr, - .maxlen = sizeof(aio_max_nr), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax, - }, -#endif /* CONFIG_AIO */ -#ifdef CONFIG_INOTIFY_USER - { - .procname = "inotify", - .mode = 0555, - .child = inotify_table, - }, -#endif -#ifdef CONFIG_FANOTIFY - { - .procname = "fanotify", - .mode = 0555, - .child = fanotify_table, - }, -#endif -#ifdef CONFIG_EPOLL - { - .procname = "epoll", - .mode = 0555, - .child = epoll_table, - }, -#endif -#endif - { - .procname = "protected_symlinks", - .data = &sysctl_protected_symlinks, - .maxlen = sizeof(int), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "protected_hardlinks", - .data = &sysctl_protected_hardlinks, - .maxlen = sizeof(int), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, - { - .procname = "protected_fifos", - .data = &sysctl_protected_fifos, - .maxlen = sizeof(int), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &two, - }, - { - .procname = "protected_regular", - .data = &sysctl_protected_regular, - .maxlen = sizeof(int), - .mode = 0600, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = &two, - }, - { - .procname = "suid_dumpable", - .data = &suid_dumpable, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax_coredump, - .extra1 = SYSCTL_ZERO, - .extra2 = &two, - }, -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE) - { - .procname = "binfmt_misc", - .mode = 0555, - .child = sysctl_mount_point, - }, -#endif - { - .procname = "pipe-max-size", - .data = &pipe_max_size, - .maxlen = sizeof(pipe_max_size), - .mode = 0644, - .proc_handler = proc_dopipe_max_size, - }, - { - .procname = "pipe-user-pages-hard", - .data = &pipe_user_pages_hard, - .maxlen = sizeof(pipe_user_pages_hard), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "pipe-user-pages-soft", - .data = &pipe_user_pages_soft, - .maxlen = sizeof(pipe_user_pages_soft), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax, - }, - { - .procname = "mount-max", - .data = &sysctl_mount_max, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - }, - { } -}; - static struct ctl_table debug_table[] = { #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE { @@ -3407,17 +2815,6 @@ static struct ctl_table debug_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, -#endif -#if defined(CONFIG_OPTPROBES) - { - .procname = "kprobes-optimization", - .data = &sysctl_kprobes_optimization, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_kprobes_optimization_handler, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #endif { } }; @@ -3426,41 +2823,18 @@ static struct ctl_table dev_table[] = { { } }; -static struct ctl_table sysctl_base_table[] = { - { - .procname = "kernel", - .mode = 0555, - .child = kern_table, - }, - { - .procname = "vm", - .mode = 0555, - .child = vm_table, - }, - { - .procname = "fs", - .mode = 0555, - .child = fs_table, - }, - { - .procname = "debug", - .mode = 0555, - .child = debug_table, - }, - { - .procname = "dev", - .mode = 0555, - .child = dev_table, - }, - { } -}; +DECLARE_SYSCTL_BASE(kernel, kern_table); +DECLARE_SYSCTL_BASE(vm, vm_table); +DECLARE_SYSCTL_BASE(debug, debug_table); +DECLARE_SYSCTL_BASE(dev, dev_table); -int __init sysctl_init(void) +int __init sysctl_init_bases(void) { - struct ctl_table_header *hdr; + register_sysctl_base(kernel); + register_sysctl_base(vm); + register_sysctl_base(debug); + register_sysctl_base(dev); - hdr = register_sysctl_table(sysctl_base_table); - kmemleak_not_leak(hdr); return 0; } #endif /* CONFIG_SYSCTL */ diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index b7e52a64294849d20ef8ac8e539f5029d67d43ef..1cf73807b4503b982cbcaa835f0a4ba9ffd17d50 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -285,7 +285,7 @@ static void clocksource_verify_choose_cpus(void) return; /* Make sure to select at least one CPU other than the current CPU. */ - cpu = cpumask_next(-1, cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); if (cpu == smp_processor_id()) cpu = cpumask_next(cpu, cpu_online_mask); if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) @@ -307,7 +307,7 @@ static void clocksource_verify_choose_cpus(void) cpu = prandom_u32() % nr_cpu_ids; cpu = cpumask_next(cpu - 1, cpu_online_mask); if (cpu >= nr_cpu_ids) - cpu = cpumask_next(-1, cpu_online_mask); + cpu = cpumask_first(cpu_online_mask); if (!WARN_ON_ONCE(cpu >= nr_cpu_ids)) cpumask_set_cpu(cpu, &cpus_chosen); } diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index f468767bc287f14cb456188e3b245cdec162160d..a5eb5e7fd624a033d00a6c59b9f8ce2b7c67f252 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -70,6 +70,19 @@ config HAVE_C_RECORDMCOUNT help C version of recordmcount available? +config HAVE_BUILDTIME_MCOUNT_SORT + bool + help + An architecture selects this if it sorts the mcount_loc section + at build time. + +config BUILDTIME_MCOUNT_SORT + bool + default y + depends on HAVE_BUILDTIME_MCOUNT_SORT && DYNAMIC_FTRACE + help + Sort the mcount_loc section at build time. + config TRACER_MAX_TRACE bool @@ -918,7 +931,7 @@ config EVENT_TRACE_TEST_SYSCALLS config FTRACE_SORT_STARTUP_TEST bool "Verify compile time sorting of ftrace functions" depends on DYNAMIC_FTRACE - depends on BUILDTIME_TABLE_SORT + depends on BUILDTIME_MCOUNT_SORT help Sorting of the mcount_loc sections that is used to find the where the ftrace knows where to patch functions for tracing diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6163b6f762f73c939064c219cd0b1bcf49f484ef..f9feb197b2daaf348622665924795a3ae88e4fda 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6435,10 +6435,10 @@ static int ftrace_process_locs(struct module *mod, /* * Sorting mcount in vmlinux at build time depend on - * CONFIG_BUILDTIME_TABLE_SORT, while mcount loc in + * CONFIG_BUILDTIME_MCOUNT_SORT, while mcount loc in * modules can not be sorted at build time. */ - if (!IS_ENABLED(CONFIG_BUILDTIME_TABLE_SORT) || mod) { + if (!IS_ENABLED(CONFIG_BUILDTIME_MCOUNT_SORT) || mod) { sort(start, count, sizeof(*start), ftrace_cmp_ips, NULL); } else { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a569a0cb81eed7ee651615394193ed36d5df6b43..7c2578efde26d4624b117346a208efa903275a5f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -252,6 +252,10 @@ __setup("trace_clock=", set_trace_boot_clock); static int __init set_tracepoint_printk(char *str) { + /* Ignore the "tp_printk_stop_on_boot" param */ + if (*str == '_') + return 0; + if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) tracepoint_printk = 1; return 1; @@ -7740,7 +7744,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) err = kzalloc(sizeof(*err), GFP_KERNEL); if (!err) err = ERR_PTR(-ENOMEM); - tr->n_err_log_entries++; + else + tr->n_err_log_entries++; return err; } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 5e6a988a8a517e44ad86cfefd14d8bf436e49a26..ada87bfb5bb85da4430ff6cb6d3ed295a8a1a084 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2503,6 +2503,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); expr->fn = hist_field_unary_minus; expr->operands[0] = operand1; + expr->size = operand1->size; + expr->is_signed = operand1->is_signed; expr->operator = FIELD_OP_UNARY_MINUS; expr->name = expr_str(expr, 0); expr->type = kstrdup_const(operand1->type, GFP_KERNEL); @@ -2719,6 +2721,7 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, /* The operand sizes should be the same, so just pick one */ expr->size = operand1->size; + expr->is_signed = operand1->is_signed; expr->operator = field_op; expr->type = kstrdup_const(operand1->type, GFP_KERNEL); @@ -3935,6 +3938,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, var_ref_idx = find_var_ref_idx(hist_data, var_ref); if (WARN_ON(var_ref_idx < 0)) { + kfree(p); ret = var_ref_idx; goto err; } @@ -6163,7 +6167,9 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, lockdep_assert_held(&event_mutex); - if (glob && strlen(glob)) { + WARN_ON(!glob); + + if (strlen(glob)) { hist_err_clear(); last_cmd_set(file, param); } @@ -6196,7 +6202,7 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops, continue; } break; - } while (p); + } while (1); if (!p) param = NULL; diff --git a/kernel/ucount.c b/kernel/ucount.c index 7b32c356ebc5cacba2e82b8f40cceb59a9161e8c..65b597431c861aa4a949e4914f073118aa7b0567 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -190,6 +190,7 @@ struct ucounts *alloc_ucounts(struct user_namespace *ns, kuid_t uid) kfree(new); } else { hlist_add_head(&new->node, hashent); + get_user_ns(new->ns); spin_unlock_irq(&ucounts_lock); return new; } @@ -210,6 +211,7 @@ void put_ucounts(struct ucounts *ucounts) if (atomic_dec_and_lock_irqsave(&ucounts->count, &ucounts_lock, flags)) { hlist_del_init(&ucounts->node); spin_unlock_irqrestore(&ucounts_lock, flags); + put_user_ns(ucounts->ns); kfree(ucounts); } } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index ad912511a0c08894ce6d2a5d1136dd5f608593c8..99afb88d2e85a8d0d14a06c53c9b90f1f6128bb4 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -740,6 +740,106 @@ int proc_watchdog_cpumask(struct ctl_table *table, int write, mutex_unlock(&watchdog_mutex); return err; } + +static const int sixty = 60; + +static struct ctl_table watchdog_sysctls[] = { + { + .procname = "watchdog", + .data = &watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_watchdog, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "watchdog_thresh", + .data = &watchdog_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_watchdog_thresh, + .extra1 = SYSCTL_ZERO, + .extra2 = (void *)&sixty, + }, + { + .procname = "nmi_watchdog", + .data = &nmi_watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = NMI_WATCHDOG_SYSCTL_PERM, + .proc_handler = proc_nmi_watchdog, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "watchdog_cpumask", + .data = &watchdog_cpumask_bits, + .maxlen = NR_CPUS, + .mode = 0644, + .proc_handler = proc_watchdog_cpumask, + }, +#ifdef CONFIG_SOFTLOCKUP_DETECTOR + { + .procname = "soft_watchdog", + .data = &soft_watchdog_user_enabled, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_soft_watchdog, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "softlockup_panic", + .data = &softlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#ifdef CONFIG_SMP + { + .procname = "softlockup_all_cpu_backtrace", + .data = &sysctl_softlockup_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_SMP */ +#endif +#ifdef CONFIG_HARDLOCKUP_DETECTOR + { + .procname = "hardlockup_panic", + .data = &hardlockup_panic, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#ifdef CONFIG_SMP + { + .procname = "hardlockup_all_cpu_backtrace", + .data = &sysctl_hardlockup_all_cpu_backtrace, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif /* CONFIG_SMP */ +#endif + {} +}; + +static void __init watchdog_sysctl_init(void) +{ + register_sysctl_init("kernel", watchdog_sysctls); +} +#else +#define watchdog_sysctl_init() do { } while (0) #endif /* CONFIG_SYSCTL */ void __init lockup_detector_init(void) @@ -753,4 +853,5 @@ void __init lockup_detector_init(void) if (!watchdog_nmi_probe()) nmi_watchdog_available = true; lockup_detector_setup(); + watchdog_sysctl_init(); } diff --git a/lib/Kconfig b/lib/Kconfig index c20b68ad2bc3e1c1b0ffdc94b0ec22d3577bc12d..c80fde816a7ee6b6eb907a1f2e7b542eba7009dc 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -65,9 +65,6 @@ config GENERIC_STRNLEN_USER config GENERIC_NET_UTILS bool -config GENERIC_FIND_FIRST_BIT - bool - source "lib/math/Kconfig" config NO_GENERIC_PCI_IOPORT_MAP @@ -673,6 +670,10 @@ config STACKDEPOT bool select STACKTRACE +config STACKDEPOT_ALWAYS_INIT + bool + select STACKDEPOT + config STACK_HASH_ORDER int "stack depot hash size (12 => 4KB, 20 => 1024KB)" range 12 20 diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index cdc842d090db388dc446e4016392e106ed8b4856..879757b6dd149547b434dccc7d5dd673e466142d 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -38,7 +38,7 @@ menuconfig KASAN CC_HAS_WORKING_NOSANITIZE_ADDRESS) || \ HAVE_ARCH_KASAN_HW_TAGS depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) - select STACKDEPOT + select STACKDEPOT_ALWAYS_INIT help Enables KASAN (KernelAddressSANitizer) - runtime memory debugger, designed to find out-of-bounds accesses and use-after-free bugs. diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index 9364f79937b814d27f1f49d9c113048263cb322b..c71c09621c09cefcc18fc1987d81a6f1ee57fb6f 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -18,14 +18,14 @@ void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) { - __blake2s_update(state, in, inlen, blake2s_compress); + __blake2s_update(state, in, inlen, false); } EXPORT_SYMBOL(blake2s_update); void blake2s_final(struct blake2s_state *state, u8 *out) { WARN_ON(IS_ENABLED(DEBUG) && !out); - __blake2s_final(state, out, blake2s_compress); + __blake2s_final(state, out, false); memzero_explicit(state, sizeof(*state)); } EXPORT_SYMBOL(blake2s_final); diff --git a/lib/find_bit.c b/lib/find_bit.c index 0f8e2e369b1d2ab87c571cf163fd77eded2af4a5..1b8e4b2a9cba8298893b6f3f4b4f3ebd928870d4 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -89,6 +89,27 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) EXPORT_SYMBOL(_find_first_bit); #endif +#ifndef find_first_and_bit +/* + * Find the first set bit in two memory regions. + */ +unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + unsigned long idx, val; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + val = addr1[idx] & addr2[idx]; + if (val) + return min(idx * BITS_PER_LONG + __ffs(val), size); + } + + return size; +} +EXPORT_SYMBOL(_find_first_and_bit); +#endif + #ifndef find_first_zero_bit /* * Find the first cleared bit in a memory region. diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index 5637c5711db94e6ab7409fa5f16523ae0d03b064..db904b57d4b87ea68beb69c0e218bb512f5df891 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -49,6 +49,25 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len) return 0; } +static int __init test_find_first_and_bit(void *bitmap, const void *bitmap2, unsigned long len) +{ + static DECLARE_BITMAP(cp, BITMAP_LEN) __initdata; + unsigned long i, cnt; + ktime_t time; + + bitmap_copy(cp, bitmap, BITMAP_LEN); + + time = ktime_get(); + for (cnt = i = 0; i < len; cnt++) { + i = find_first_and_bit(cp, bitmap2, len); + __clear_bit(i, cp); + } + time = ktime_get() - time; + pr_err("find_first_and_bit: %18llu ns, %6ld iterations\n", time, cnt); + + return 0; +} + static int __init test_find_next_bit(const void *bitmap, unsigned long len) { unsigned long i, cnt; @@ -129,6 +148,7 @@ static int __init find_bit_test(void) * traverse only part of bitmap to avoid soft lockup. */ test_find_first_bit(bitmap, BITMAP_LEN / 10); + test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN / 2); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); pr_err("\nStart testing find_bit() with sparse bitmap\n"); @@ -145,6 +165,7 @@ static int __init find_bit_test(void) test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); test_find_first_bit(bitmap, BITMAP_LEN); + test_find_first_and_bit(bitmap, bitmap2, BITMAP_LEN); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); /* diff --git a/lib/genalloc.c b/lib/genalloc.c index 9a57257988c77b1bda15302c6c07cf327ce84ed9..00fc50d0a640e03bcd72bf382b2814397b90b897 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -251,7 +251,7 @@ void gen_pool_destroy(struct gen_pool *pool) list_del(&chunk->next_chunk); end_bit = chunk_size(chunk) >> order; - bit = find_next_bit(chunk->bits, end_bit, 0); + bit = find_first_bit(chunk->bits, end_bit); BUG_ON(bit < end_bit); vfree(chunk); diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 6220fa67fb7e6cd2c383efe28b4b9cced6d8e70a..09d293c30fd2d38be702b6a0fa88a35a219abfed 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -488,9 +488,13 @@ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users) { unsigned int wake_batch; + unsigned int min_batch; + unsigned int depth = (sbq->sb.depth + users - 1) / users; - wake_batch = clamp_val((sbq->sb.depth + users - 1) / - users, 4, SBQ_WAKE_BATCH); + min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1; + + wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, + min_batch, SBQ_WAKE_BATCH); __sbitmap_queue_update_wake_batch(sbq, wake_batch); } EXPORT_SYMBOL_GPL(sbitmap_queue_recalculate_wake_batch); diff --git a/lib/stackdepot.c b/lib/stackdepot.c index b437ae79aca143f6fde06693d8fd4ce2279acb5d..bf5ba9af0500961e30605d13a214318e7d4c0f80 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -161,18 +162,40 @@ static int __init is_stack_depot_disabled(char *str) } early_param("stack_depot_disable", is_stack_depot_disabled); -int __init stack_depot_init(void) +/* + * __ref because of memblock_alloc(), which will not be actually called after + * the __init code is gone, because at that point slab_is_available() is true + */ +__ref int stack_depot_init(void) { - if (!stack_depot_disable) { + static DEFINE_MUTEX(stack_depot_init_mutex); + + mutex_lock(&stack_depot_init_mutex); + if (!stack_depot_disable && !stack_table) { size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *)); int i; - stack_table = memblock_alloc(size, size); - for (i = 0; i < STACK_HASH_SIZE; i++) - stack_table[i] = NULL; + if (slab_is_available()) { + pr_info("Stack Depot allocating hash table with kvmalloc\n"); + stack_table = kvmalloc(size, GFP_KERNEL); + } else { + pr_info("Stack Depot allocating hash table with memblock_alloc\n"); + stack_table = memblock_alloc(size, SMP_CACHE_BYTES); + } + if (stack_table) { + for (i = 0; i < STACK_HASH_SIZE; i++) + stack_table[i] = NULL; + } else { + pr_err("Stack Depot hash table allocation failed, disabling\n"); + stack_depot_disable = true; + mutex_unlock(&stack_depot_init_mutex); + return -ENOMEM; + } } + mutex_unlock(&stack_depot_init_mutex); return 0; } +EXPORT_SYMBOL_GPL(stack_depot_init); /* Calculate hash for a stack */ static inline u32 hash_stack(unsigned long *entries, unsigned int size) @@ -305,6 +328,9 @@ EXPORT_SYMBOL_GPL(stack_depot_fetch); * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids * any allocations and will fail if no space is left to store the stack trace. * + * If the stack trace in @entries is from an interrupt, only the portion up to + * interrupt entry is saved. + * * Context: Any context, but setting @can_alloc to %false is required if * alloc_pages() cannot be used from the current context. Currently * this is the case from contexts where neither %GFP_ATOMIC nor @@ -323,6 +349,16 @@ depot_stack_handle_t __stack_depot_save(unsigned long *entries, unsigned long flags; u32 hash; + /* + * If this stack trace is from an interrupt, including anything before + * interrupt entry usually leads to unbounded stackdepot growth. + * + * Because use of filter_irq_stacks() is a requirement to ensure + * stackdepot can efficiently deduplicate interrupt stacks, always + * filter_irq_stacks() to simplify all callers' use of stackdepot. + */ + nr_entries = filter_irq_stacks(entries, nr_entries); + if (unlikely(nr_entries == 0) || stack_depot_disable) goto fast_exit; diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index d33fa5a61b95845e3a3e09a876968ae0fecd0872..0c82f07f74fcba6ae31ea7603da7c57a111339ad 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -446,6 +446,42 @@ static void __init test_bitmap_parselist(void) } } +static void __init test_bitmap_printlist(void) +{ + unsigned long *bmap = kmalloc(PAGE_SIZE, GFP_KERNEL); + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + char expected[256]; + int ret, slen; + ktime_t time; + + if (!buf || !bmap) + goto out; + + memset(bmap, -1, PAGE_SIZE); + slen = snprintf(expected, 256, "0-%ld", PAGE_SIZE * 8 - 1); + if (slen < 0) + goto out; + + time = ktime_get(); + ret = bitmap_print_to_pagebuf(true, buf, bmap, PAGE_SIZE * 8); + time = ktime_get() - time; + + if (ret != slen + 1) { + pr_err("bitmap_print_to_pagebuf: result is %d, expected %d\n", ret, slen); + goto out; + } + + if (strncmp(buf, expected, slen)) { + pr_err("bitmap_print_to_pagebuf: result is %s, expected %s\n", buf, expected); + goto out; + } + + pr_err("bitmap_print_to_pagebuf: input is '%s', Time: %llu\n", buf, time); +out: + kfree(buf); + kfree(bmap); +} + static const unsigned long parse_test[] __initconst = { BITMAP_FROM_U64(0), BITMAP_FROM_U64(1), @@ -818,6 +854,7 @@ static void __init selftest(void) test_bitmap_arr32(); test_bitmap_parse(); test_bitmap_parselist(); + test_bitmap_printlist(); test_mem_optimisations(); test_for_each_set_clump8(); test_bitmap_cut(); diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 847cdbefab461fa57c3476618f265edd775be9df..26a5c9007653aa5deabae35157cdfcf66788ca43 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -492,6 +492,7 @@ static void kmalloc_oob_in_memset(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size + KASAN_GRANULE_SIZE)); @@ -515,6 +516,7 @@ static void kmalloc_memmove_negative_size(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset((char *)ptr, 0, 64); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(invalid_size); KUNIT_EXPECT_KASAN_FAIL(test, memmove((char *)ptr, (char *)ptr + 4, invalid_size)); @@ -531,6 +533,7 @@ static void kmalloc_memmove_invalid_size(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset((char *)ptr, 0, 64); + OPTIMIZER_HIDE_VAR(ptr); KUNIT_EXPECT_KASAN_FAIL(test, memmove((char *)ptr, (char *)ptr + 4, invalid_size)); kfree(ptr); @@ -893,6 +896,7 @@ static void kasan_memchr(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = memchr(ptr, '1', size + 1)); @@ -919,6 +923,7 @@ static void kasan_memcmp(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset(arr, 0, sizeof(arr)); + OPTIMIZER_HIDE_VAR(ptr); OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = memcmp(ptr, arr, size+1)); diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c index 3750323973f4af802644899dd9d2dba612895f64..a5a3d6c27e1faf8f892ed65ec21b5e27277870ad 100644 --- a/lib/test_sysctl.c +++ b/lib/test_sysctl.c @@ -128,26 +128,6 @@ static struct ctl_table test_table[] = { { } }; -static struct ctl_table test_sysctl_table[] = { - { - .procname = "test_sysctl", - .maxlen = 0, - .mode = 0555, - .child = test_table, - }, - { } -}; - -static struct ctl_table test_sysctl_root_table[] = { - { - .procname = "debug", - .maxlen = 0, - .mode = 0555, - .child = test_sysctl_table, - }, - { } -}; - static struct ctl_table_header *test_sysctl_header; static int __init test_sysctl_init(void) @@ -155,7 +135,7 @@ static int __init test_sysctl_init(void) test_data.bitmap_0001 = kzalloc(SYSCTL_TEST_BITMAP_SIZE/8, GFP_KERNEL); if (!test_data.bitmap_0001) return -ENOMEM; - test_sysctl_header = register_sysctl_table(test_sysctl_root_table); + test_sysctl_header = register_sysctl("debug/test_sysctl", test_table); if (!test_sysctl_header) { kfree(test_data.bitmap_0001); return -ENOMEM; diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 53d6081f9e8b9bc4da356979e56e9cce7302f788..3b8129dd374cd96f66439a826983e2bcc63cce70 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1241,20 +1241,13 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, struct printf_spec spec, const char *fmt) { int nr_bits = max_t(int, spec.field_width, 0); - /* current bit is 'cur', most recently seen range is [rbot, rtop] */ - int cur, rbot, rtop; bool first = true; + int rbot, rtop; if (check_pointer(&buf, end, bitmap, spec)) return buf; - rbot = cur = find_first_bit(bitmap, nr_bits); - while (cur < nr_bits) { - rtop = cur; - cur = find_next_bit(bitmap, nr_bits, cur + 1); - if (cur < nr_bits && cur <= rtop + 1) - continue; - + for_each_set_bitrange(rbot, rtop, bitmap, nr_bits) { if (!first) { if (buf < end) *buf = ','; @@ -1263,15 +1256,12 @@ char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, first = false; buf = number(buf, end, rbot, default_dec_spec); - if (rbot < rtop) { - if (buf < end) - *buf = '-'; - buf++; - - buf = number(buf, end, rtop, default_dec_spec); - } + if (rtop == rbot + 1) + continue; - rbot = cur; + if (buf < end) + *buf = '-'; + buf = number(++buf, end, rtop - 1, default_dec_spec); } return buf; } diff --git a/mm/Kconfig b/mm/Kconfig index a99bd499ef51d480d82778398dbf4f1e196e7f20..3326ee3903f33078143607ba0a67d131d567a03f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -444,43 +444,8 @@ config USE_PERCPU_NUMA_NODE_ID config HAVE_SETUP_PER_CPU_AREA bool -config CLEANCACHE - bool "Enable cleancache driver to cache clean pages if tmem is present" - help - Cleancache can be thought of as a page-granularity victim cache - for clean pages that the kernel's pageframe replacement algorithm - (PFRA) would like to keep around, but can't since there isn't enough - memory. So when the PFRA "evicts" a page, it first attempts to use - cleancache code to put the data contained in that page into - "transcendent memory", memory that is not directly accessible or - addressable by the kernel and is of unknown and possibly - time-varying size. And when a cleancache-enabled - filesystem wishes to access a page in a file on disk, it first - checks cleancache to see if it already contains it; if it does, - the page is copied into the kernel and a disk access is avoided. - When a transcendent memory driver is available (such as zcache or - Xen transcendent memory), a significant I/O reduction - may be achieved. When none is available, all cleancache calls - are reduced to a single pointer-compare-against-NULL resulting - in a negligible performance hit. - - If unsure, say Y to enable cleancache - config FRONTSWAP - bool "Enable frontswap to cache swap pages if tmem is present" - depends on SWAP - help - Frontswap is so named because it can be thought of as the opposite - of a "backing" store for a swap device. The data is stored into - "transcendent memory", memory that is not directly accessible or - addressable by the kernel and is of unknown and possibly - time-varying size. When space in transcendent memory is available, - a significant swap I/O reduction may be achieved. When none is - available, all frontswap calls are reduced to a single pointer- - compare-against-NULL resulting in a negligible performance hit - and swap data is stored as normal on the matching swap device. - - If unsure, say Y to enable frontswap. + bool config CMA bool "Contiguous Memory Allocator" @@ -545,7 +510,8 @@ config MEM_SOFT_DIRTY config ZSWAP bool "Compressed cache for swap pages (EXPERIMENTAL)" - depends on FRONTSWAP && CRYPTO=y + depends on SWAP && CRYPTO=y + select FRONTSWAP select ZPOOL help A lightweight compressed cache for swap pages. It takes diff --git a/mm/Makefile b/mm/Makefile index 588d3113f3b08b5f27b20a3532fca06bdcf40a5f..70d4309c9ce33888389210776272d1656bf3ed50 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -104,7 +104,6 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o obj-$(CONFIG_DEBUG_VM_PGTABLE) += debug_vm_pgtable.o obj-$(CONFIG_PAGE_OWNER) += page_owner.o -obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o obj-$(CONFIG_ZPOOL) += zpool.o obj-$(CONFIG_ZBUD) += zbud.o diff --git a/mm/cleancache.c b/mm/cleancache.c deleted file mode 100644 index db7eee9c08863cae43a79707afb04ce0b649d5b4..0000000000000000000000000000000000000000 --- a/mm/cleancache.c +++ /dev/null @@ -1,315 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Cleancache frontend - * - * This code provides the generic "frontend" layer to call a matching - * "backend" driver implementation of cleancache. See - * Documentation/vm/cleancache.rst for more information. - * - * Copyright (C) 2009-2010 Oracle Corp. All rights reserved. - * Author: Dan Magenheimer - */ - -#include -#include -#include -#include -#include -#include - -/* - * cleancache_ops is set by cleancache_register_ops to contain the pointers - * to the cleancache "backend" implementation functions. - */ -static const struct cleancache_ops *cleancache_ops __read_mostly; - -/* - * Counters available via /sys/kernel/debug/cleancache (if debugfs is - * properly configured. These are for information only so are not protected - * against increment races. - */ -static u64 cleancache_succ_gets; -static u64 cleancache_failed_gets; -static u64 cleancache_puts; -static u64 cleancache_invalidates; - -static void cleancache_register_ops_sb(struct super_block *sb, void *unused) -{ - switch (sb->cleancache_poolid) { - case CLEANCACHE_NO_BACKEND: - __cleancache_init_fs(sb); - break; - case CLEANCACHE_NO_BACKEND_SHARED: - __cleancache_init_shared_fs(sb); - break; - } -} - -/* - * Register operations for cleancache. Returns 0 on success. - */ -int cleancache_register_ops(const struct cleancache_ops *ops) -{ - if (cmpxchg(&cleancache_ops, NULL, ops)) - return -EBUSY; - - /* - * A cleancache backend can be built as a module and hence loaded after - * a cleancache enabled filesystem has called cleancache_init_fs. To - * handle such a scenario, here we call ->init_fs or ->init_shared_fs - * for each active super block. To differentiate between local and - * shared filesystems, we temporarily initialize sb->cleancache_poolid - * to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED - * respectively in case there is no backend registered at the time - * cleancache_init_fs or cleancache_init_shared_fs is called. - * - * Since filesystems can be mounted concurrently with cleancache - * backend registration, we have to be careful to guarantee that all - * cleancache enabled filesystems that has been mounted by the time - * cleancache_register_ops is called has got and all mounted later will - * get cleancache_poolid. This is assured by the following statements - * tied together: - * - * a) iterate_supers skips only those super blocks that has started - * ->kill_sb - * - * b) if iterate_supers encounters a super block that has not finished - * ->mount yet, it waits until it is finished - * - * c) cleancache_init_fs is called from ->mount and - * cleancache_invalidate_fs is called from ->kill_sb - * - * d) we call iterate_supers after cleancache_ops has been set - * - * From a) it follows that if iterate_supers skips a super block, then - * either the super block is already dead, in which case we do not need - * to bother initializing cleancache for it, or it was mounted after we - * initiated iterate_supers. In the latter case, it must have seen - * cleancache_ops set according to d) and initialized cleancache from - * ->mount by itself according to c). This proves that we call - * ->init_fs at least once for each active super block. - * - * From b) and c) it follows that if iterate_supers encounters a super - * block that has already started ->init_fs, it will wait until ->mount - * and hence ->init_fs has finished, then check cleancache_poolid, see - * that it has already been set and therefore do nothing. This proves - * that we call ->init_fs no more than once for each super block. - * - * Combined together, the last two paragraphs prove the function - * correctness. - * - * Note that various cleancache callbacks may proceed before this - * function is called or even concurrently with it, but since - * CLEANCACHE_NO_BACKEND is negative, they will all result in a noop - * until the corresponding ->init_fs has been actually called and - * cleancache_ops has been set. - */ - iterate_supers(cleancache_register_ops_sb, NULL); - return 0; -} -EXPORT_SYMBOL(cleancache_register_ops); - -/* Called by a cleancache-enabled filesystem at time of mount */ -void __cleancache_init_fs(struct super_block *sb) -{ - int pool_id = CLEANCACHE_NO_BACKEND; - - if (cleancache_ops) { - pool_id = cleancache_ops->init_fs(PAGE_SIZE); - if (pool_id < 0) - pool_id = CLEANCACHE_NO_POOL; - } - sb->cleancache_poolid = pool_id; -} -EXPORT_SYMBOL(__cleancache_init_fs); - -/* Called by a cleancache-enabled clustered filesystem at time of mount */ -void __cleancache_init_shared_fs(struct super_block *sb) -{ - int pool_id = CLEANCACHE_NO_BACKEND_SHARED; - - if (cleancache_ops) { - pool_id = cleancache_ops->init_shared_fs(&sb->s_uuid, PAGE_SIZE); - if (pool_id < 0) - pool_id = CLEANCACHE_NO_POOL; - } - sb->cleancache_poolid = pool_id; -} -EXPORT_SYMBOL(__cleancache_init_shared_fs); - -/* - * If the filesystem uses exportable filehandles, use the filehandle as - * the key, else use the inode number. - */ -static int cleancache_get_key(struct inode *inode, - struct cleancache_filekey *key) -{ - int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *); - int len = 0, maxlen = CLEANCACHE_KEY_MAX; - struct super_block *sb = inode->i_sb; - - key->u.ino = inode->i_ino; - if (sb->s_export_op != NULL) { - fhfn = sb->s_export_op->encode_fh; - if (fhfn) { - len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL); - if (len <= FILEID_ROOT || len == FILEID_INVALID) - return -1; - if (maxlen > CLEANCACHE_KEY_MAX) - return -1; - } - } - return 0; -} - -/* - * "Get" data from cleancache associated with the poolid/inode/index - * that were specified when the data was put to cleanache and, if - * successful, use it to fill the specified page with data and return 0. - * The pageframe is unchanged and returns -1 if the get fails. - * Page must be locked by caller. - * - * The function has two checks before any action is taken - whether - * a backend is registered and whether the sb->cleancache_poolid - * is correct. - */ -int __cleancache_get_page(struct page *page) -{ - int ret = -1; - int pool_id; - struct cleancache_filekey key = { .u.key = { 0 } }; - - if (!cleancache_ops) { - cleancache_failed_gets++; - goto out; - } - - VM_BUG_ON_PAGE(!PageLocked(page), page); - pool_id = page->mapping->host->i_sb->cleancache_poolid; - if (pool_id < 0) - goto out; - - if (cleancache_get_key(page->mapping->host, &key) < 0) - goto out; - - ret = cleancache_ops->get_page(pool_id, key, page->index, page); - if (ret == 0) - cleancache_succ_gets++; - else - cleancache_failed_gets++; -out: - return ret; -} -EXPORT_SYMBOL(__cleancache_get_page); - -/* - * "Put" data from a page to cleancache and associate it with the - * (previously-obtained per-filesystem) poolid and the page's, - * inode and page index. Page must be locked. Note that a put_page - * always "succeeds", though a subsequent get_page may succeed or fail. - * - * The function has two checks before any action is taken - whether - * a backend is registered and whether the sb->cleancache_poolid - * is correct. - */ -void __cleancache_put_page(struct page *page) -{ - int pool_id; - struct cleancache_filekey key = { .u.key = { 0 } }; - - if (!cleancache_ops) { - cleancache_puts++; - return; - } - - VM_BUG_ON_PAGE(!PageLocked(page), page); - pool_id = page->mapping->host->i_sb->cleancache_poolid; - if (pool_id >= 0 && - cleancache_get_key(page->mapping->host, &key) >= 0) { - cleancache_ops->put_page(pool_id, key, page->index, page); - cleancache_puts++; - } -} -EXPORT_SYMBOL(__cleancache_put_page); - -/* - * Invalidate any data from cleancache associated with the poolid and the - * page's inode and page index so that a subsequent "get" will fail. - * - * The function has two checks before any action is taken - whether - * a backend is registered and whether the sb->cleancache_poolid - * is correct. - */ -void __cleancache_invalidate_page(struct address_space *mapping, - struct page *page) -{ - /* careful... page->mapping is NULL sometimes when this is called */ - int pool_id = mapping->host->i_sb->cleancache_poolid; - struct cleancache_filekey key = { .u.key = { 0 } }; - - if (!cleancache_ops) - return; - - if (pool_id >= 0) { - VM_BUG_ON_PAGE(!PageLocked(page), page); - if (cleancache_get_key(mapping->host, &key) >= 0) { - cleancache_ops->invalidate_page(pool_id, - key, page->index); - cleancache_invalidates++; - } - } -} -EXPORT_SYMBOL(__cleancache_invalidate_page); - -/* - * Invalidate all data from cleancache associated with the poolid and the - * mappings's inode so that all subsequent gets to this poolid/inode - * will fail. - * - * The function has two checks before any action is taken - whether - * a backend is registered and whether the sb->cleancache_poolid - * is correct. - */ -void __cleancache_invalidate_inode(struct address_space *mapping) -{ - int pool_id = mapping->host->i_sb->cleancache_poolid; - struct cleancache_filekey key = { .u.key = { 0 } }; - - if (!cleancache_ops) - return; - - if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0) - cleancache_ops->invalidate_inode(pool_id, key); -} -EXPORT_SYMBOL(__cleancache_invalidate_inode); - -/* - * Called by any cleancache-enabled filesystem at time of unmount; - * note that pool_id is surrendered and may be returned by a subsequent - * cleancache_init_fs or cleancache_init_shared_fs. - */ -void __cleancache_invalidate_fs(struct super_block *sb) -{ - int pool_id; - - pool_id = sb->cleancache_poolid; - sb->cleancache_poolid = CLEANCACHE_NO_POOL; - - if (cleancache_ops && pool_id >= 0) - cleancache_ops->invalidate_fs(pool_id); -} -EXPORT_SYMBOL(__cleancache_invalidate_fs); - -static int __init init_cleancache(void) -{ -#ifdef CONFIG_DEBUG_FS - struct dentry *root = debugfs_create_dir("cleancache", NULL); - - debugfs_create_u64("succ_gets", 0444, root, &cleancache_succ_gets); - debugfs_create_u64("failed_gets", 0444, root, &cleancache_failed_gets); - debugfs_create_u64("puts", 0444, root, &cleancache_puts); - debugfs_create_u64("invalidates", 0444, root, &cleancache_invalidates); -#endif - return 0; -} -module_init(init_cleancache) diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index a7ac97c76762c449f60f6009c3926bc29f93f41e..db2abd9e415b7d8b8cdf28611360d5e234367e53 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -171,6 +171,8 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args) ptep_test_and_clear_young(args->vma, args->vaddr, args->ptep); pte = ptep_get(args->ptep); WARN_ON(pte_young(pte)); + + ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1); } static void __init pte_savedwrite_tests(struct pgtable_debug_args *args) diff --git a/mm/filemap.c b/mm/filemap.c index 2fd9b2f24025ff69b2f272cfc5aaff8e74d4c9df..ad8c39d90bf946dfca3447cae87d23684214ca1b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -34,13 +35,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include #include #include "internal.h" @@ -149,16 +150,6 @@ static void filemap_unaccount_folio(struct address_space *mapping, { long nr; - /* - * if we're uptodate, flush out into the cleancache, otherwise - * invalidate any existing cleancache entries. We can't leave - * stale data around in the cleancache once our page is gone - */ - if (folio_test_uptodate(folio) && folio_test_mappedtodisk(folio)) - cleancache_put_page(&folio->page); - else - cleancache_invalidate_page(mapping, &folio->page); - VM_BUG_ON_FOLIO(folio_mapped(folio), folio); if (!IS_ENABLED(CONFIG_DEBUG_VM) && unlikely(folio_mapped(folio))) { int mapcount; @@ -231,17 +222,15 @@ void __filemap_remove_folio(struct folio *folio, void *shadow) void filemap_free_folio(struct address_space *mapping, struct folio *folio) { void (*freepage)(struct page *); + int refs = 1; freepage = mapping->a_ops->freepage; if (freepage) freepage(&folio->page); - if (folio_test_large(folio) && !folio_test_hugetlb(folio)) { - folio_ref_sub(folio, folio_nr_pages(folio)); - VM_BUG_ON_FOLIO(folio_ref_count(folio) <= 0, folio); - } else { - folio_put(folio); - } + if (folio_test_large(folio) && !folio_test_hugetlb(folio)) + refs = folio_nr_pages(folio); + folio_put_refs(folio, refs); } /** @@ -1388,6 +1377,95 @@ repeat: return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR; } +#ifdef CONFIG_MIGRATION +/** + * migration_entry_wait_on_locked - Wait for a migration entry to be removed + * @entry: migration swap entry. + * @ptep: mapped pte pointer. Will return with the ptep unmapped. Only required + * for pte entries, pass NULL for pmd entries. + * @ptl: already locked ptl. This function will drop the lock. + * + * Wait for a migration entry referencing the given page to be removed. This is + * equivalent to put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE) except + * this can be called without taking a reference on the page. Instead this + * should be called while holding the ptl for the migration entry referencing + * the page. + * + * Returns after unmapping and unlocking the pte/ptl with pte_unmap_unlock(). + * + * This follows the same logic as folio_wait_bit_common() so see the comments + * there. + */ +void migration_entry_wait_on_locked(swp_entry_t entry, pte_t *ptep, + spinlock_t *ptl) +{ + struct wait_page_queue wait_page; + wait_queue_entry_t *wait = &wait_page.wait; + bool thrashing = false; + bool delayacct = false; + unsigned long pflags; + wait_queue_head_t *q; + struct folio *folio = page_folio(pfn_swap_entry_to_page(entry)); + + q = folio_waitqueue(folio); + if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) { + if (!folio_test_swapbacked(folio)) { + delayacct_thrashing_start(); + delayacct = true; + } + psi_memstall_enter(&pflags); + thrashing = true; + } + + init_wait(wait); + wait->func = wake_page_function; + wait_page.folio = folio; + wait_page.bit_nr = PG_locked; + wait->flags = 0; + + spin_lock_irq(&q->lock); + folio_set_waiters(folio); + if (!folio_trylock_flag(folio, PG_locked, wait)) + __add_wait_queue_entry_tail(q, wait); + spin_unlock_irq(&q->lock); + + /* + * If a migration entry exists for the page the migration path must hold + * a valid reference to the page, and it must take the ptl to remove the + * migration entry. So the page is valid until the ptl is dropped. + */ + if (ptep) + pte_unmap_unlock(ptep, ptl); + else + spin_unlock(ptl); + + for (;;) { + unsigned int flags; + + set_current_state(TASK_UNINTERRUPTIBLE); + + /* Loop until we've been woken or interrupted */ + flags = smp_load_acquire(&wait->flags); + if (!(flags & WQ_FLAG_WOKEN)) { + if (signal_pending_state(TASK_UNINTERRUPTIBLE, current)) + break; + + io_schedule(); + continue; + } + break; + } + + finish_wait(q, wait); + + if (thrashing) { + if (delayacct) + delayacct_thrashing_end(); + psi_memstall_leave(&pflags); + } +} +#endif + void folio_wait_bit(struct folio *folio, int bit_nr) { folio_wait_bit_common(folio, bit_nr, TASK_UNINTERRUPTIBLE, SHARED); diff --git a/mm/frontswap.c b/mm/frontswap.c index 6bed12260dea745759f71f9c2b99fd9920331efe..6f69b044a8cc749f83592c6adbbb45f0d73545ac 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -27,27 +27,7 @@ DEFINE_STATIC_KEY_FALSE(frontswap_enabled_key); * may be registered, but implementations can never deregister. This * is a simple singly-linked list of all registered implementations. */ -static struct frontswap_ops *frontswap_ops __read_mostly; - -#define for_each_frontswap_ops(ops) \ - for ((ops) = frontswap_ops; (ops); (ops) = (ops)->next) - -/* - * If enabled, frontswap_store will return failure even on success. As - * a result, the swap subsystem will always write the page to swap, in - * effect converting frontswap into a writethrough cache. In this mode, - * there is no direct reduction in swap writes, but a frontswap backend - * can unilaterally "reclaim" any pages in use with no data loss, thus - * providing increases control over maximum memory usage due to frontswap. - */ -static bool frontswap_writethrough_enabled __read_mostly; - -/* - * If enabled, the underlying tmem implementation is capable of doing - * exclusive gets, so frontswap_load, on a successful tmem_get must - * mark the page as no longer in frontswap AND mark it dirty. - */ -static bool frontswap_tmem_exclusive_gets_enabled __read_mostly; +static const struct frontswap_ops *frontswap_ops __read_mostly; #ifdef CONFIG_DEBUG_FS /* @@ -114,87 +94,22 @@ static inline void inc_frontswap_invalidates(void) { } /* * Register operations for frontswap */ -void frontswap_register_ops(struct frontswap_ops *ops) +int frontswap_register_ops(const struct frontswap_ops *ops) { - DECLARE_BITMAP(a, MAX_SWAPFILES); - DECLARE_BITMAP(b, MAX_SWAPFILES); - struct swap_info_struct *si; - unsigned int i; - - bitmap_zero(a, MAX_SWAPFILES); - bitmap_zero(b, MAX_SWAPFILES); - - spin_lock(&swap_lock); - plist_for_each_entry(si, &swap_active_head, list) { - if (!WARN_ON(!si->frontswap_map)) - __set_bit(si->type, a); - } - spin_unlock(&swap_lock); - - /* the new ops needs to know the currently active swap devices */ - for_each_set_bit(i, a, MAX_SWAPFILES) - ops->init(i); - - /* - * Setting frontswap_ops must happen after the ops->init() calls - * above; cmpxchg implies smp_mb() which will ensure the init is - * complete at this point. - */ - do { - ops->next = frontswap_ops; - } while (cmpxchg(&frontswap_ops, ops->next, ops) != ops->next); + if (frontswap_ops) + return -EINVAL; + frontswap_ops = ops; static_branch_inc(&frontswap_enabled_key); - - spin_lock(&swap_lock); - plist_for_each_entry(si, &swap_active_head, list) { - if (si->frontswap_map) - __set_bit(si->type, b); - } - spin_unlock(&swap_lock); - - /* - * On the very unlikely chance that a swap device was added or - * removed between setting the "a" list bits and the ops init - * calls, we re-check and do init or invalidate for any changed - * bits. - */ - if (unlikely(!bitmap_equal(a, b, MAX_SWAPFILES))) { - for (i = 0; i < MAX_SWAPFILES; i++) { - if (!test_bit(i, a) && test_bit(i, b)) - ops->init(i); - else if (test_bit(i, a) && !test_bit(i, b)) - ops->invalidate_area(i); - } - } -} -EXPORT_SYMBOL(frontswap_register_ops); - -/* - * Enable/disable frontswap writethrough (see above). - */ -void frontswap_writethrough(bool enable) -{ - frontswap_writethrough_enabled = enable; -} -EXPORT_SYMBOL(frontswap_writethrough); - -/* - * Enable/disable frontswap exclusive gets (see above). - */ -void frontswap_tmem_exclusive_gets(bool enable) -{ - frontswap_tmem_exclusive_gets_enabled = enable; + return 0; } -EXPORT_SYMBOL(frontswap_tmem_exclusive_gets); /* * Called when a swap device is swapon'd. */ -void __frontswap_init(unsigned type, unsigned long *map) +void frontswap_init(unsigned type, unsigned long *map) { struct swap_info_struct *sis = swap_info[type]; - struct frontswap_ops *ops; VM_BUG_ON(sis == NULL); @@ -210,20 +125,16 @@ void __frontswap_init(unsigned type, unsigned long *map) * p->frontswap set to something valid to work properly. */ frontswap_map_set(sis, map); - - for_each_frontswap_ops(ops) - ops->init(type); + frontswap_ops->init(type); } -EXPORT_SYMBOL(__frontswap_init); -bool __frontswap_test(struct swap_info_struct *sis, +static bool __frontswap_test(struct swap_info_struct *sis, pgoff_t offset) { if (sis->frontswap_map) return test_bit(offset, sis->frontswap_map); return false; } -EXPORT_SYMBOL(__frontswap_test); static inline void __frontswap_set(struct swap_info_struct *sis, pgoff_t offset) @@ -253,7 +164,6 @@ int __frontswap_store(struct page *page) int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - struct frontswap_ops *ops; VM_BUG_ON(!frontswap_ops); VM_BUG_ON(!PageLocked(page)); @@ -267,28 +177,19 @@ int __frontswap_store(struct page *page) */ if (__frontswap_test(sis, offset)) { __frontswap_clear(sis, offset); - for_each_frontswap_ops(ops) - ops->invalidate_page(type, offset); + frontswap_ops->invalidate_page(type, offset); } - /* Try to store in each implementation, until one succeeds. */ - for_each_frontswap_ops(ops) { - ret = ops->store(type, offset, page); - if (!ret) /* successful store */ - break; - } + ret = frontswap_ops->store(type, offset, page); if (ret == 0) { __frontswap_set(sis, offset); inc_frontswap_succ_stores(); } else { inc_frontswap_failed_stores(); } - if (frontswap_writethrough_enabled) - /* report failure so swap also writes to swap device */ - ret = -1; + return ret; } -EXPORT_SYMBOL(__frontswap_store); /* * "Get" data from frontswap associated with swaptype and offset that were @@ -302,7 +203,6 @@ int __frontswap_load(struct page *page) int type = swp_type(entry); struct swap_info_struct *sis = swap_info[type]; pgoff_t offset = swp_offset(entry); - struct frontswap_ops *ops; VM_BUG_ON(!frontswap_ops); VM_BUG_ON(!PageLocked(page)); @@ -312,21 +212,11 @@ int __frontswap_load(struct page *page) return -1; /* Try loading from each implementation, until one succeeds. */ - for_each_frontswap_ops(ops) { - ret = ops->load(type, offset, page); - if (!ret) /* successful load */ - break; - } - if (ret == 0) { + ret = frontswap_ops->load(type, offset, page); + if (ret == 0) inc_frontswap_loads(); - if (frontswap_tmem_exclusive_gets_enabled) { - SetPageDirty(page); - __frontswap_clear(sis, offset); - } - } return ret; } -EXPORT_SYMBOL(__frontswap_load); /* * Invalidate any data from frontswap associated with the specified swaptype @@ -335,7 +225,6 @@ EXPORT_SYMBOL(__frontswap_load); void __frontswap_invalidate_page(unsigned type, pgoff_t offset) { struct swap_info_struct *sis = swap_info[type]; - struct frontswap_ops *ops; VM_BUG_ON(!frontswap_ops); VM_BUG_ON(sis == NULL); @@ -343,12 +232,10 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset) if (!__frontswap_test(sis, offset)) return; - for_each_frontswap_ops(ops) - ops->invalidate_page(type, offset); + frontswap_ops->invalidate_page(type, offset); __frontswap_clear(sis, offset); inc_frontswap_invalidates(); } -EXPORT_SYMBOL(__frontswap_invalidate_page); /* * Invalidate all data from frontswap associated with all offsets for the @@ -357,7 +244,6 @@ EXPORT_SYMBOL(__frontswap_invalidate_page); void __frontswap_invalidate_area(unsigned type) { struct swap_info_struct *sis = swap_info[type]; - struct frontswap_ops *ops; VM_BUG_ON(!frontswap_ops); VM_BUG_ON(sis == NULL); @@ -365,123 +251,10 @@ void __frontswap_invalidate_area(unsigned type) if (sis->frontswap_map == NULL) return; - for_each_frontswap_ops(ops) - ops->invalidate_area(type); + frontswap_ops->invalidate_area(type); atomic_set(&sis->frontswap_pages, 0); bitmap_zero(sis->frontswap_map, sis->max); } -EXPORT_SYMBOL(__frontswap_invalidate_area); - -static unsigned long __frontswap_curr_pages(void) -{ - unsigned long totalpages = 0; - struct swap_info_struct *si = NULL; - - assert_spin_locked(&swap_lock); - plist_for_each_entry(si, &swap_active_head, list) - totalpages += atomic_read(&si->frontswap_pages); - return totalpages; -} - -static int __frontswap_unuse_pages(unsigned long total, unsigned long *unused, - int *swapid) -{ - int ret = -EINVAL; - struct swap_info_struct *si = NULL; - int si_frontswap_pages; - unsigned long total_pages_to_unuse = total; - unsigned long pages = 0, pages_to_unuse = 0; - - assert_spin_locked(&swap_lock); - plist_for_each_entry(si, &swap_active_head, list) { - si_frontswap_pages = atomic_read(&si->frontswap_pages); - if (total_pages_to_unuse < si_frontswap_pages) { - pages = pages_to_unuse = total_pages_to_unuse; - } else { - pages = si_frontswap_pages; - pages_to_unuse = 0; /* unuse all */ - } - /* ensure there is enough RAM to fetch pages from frontswap */ - if (security_vm_enough_memory_mm(current->mm, pages)) { - ret = -ENOMEM; - continue; - } - vm_unacct_memory(pages); - *unused = pages_to_unuse; - *swapid = si->type; - ret = 0; - break; - } - - return ret; -} - -/* - * Used to check if it's necessary and feasible to unuse pages. - * Return 1 when nothing to do, 0 when need to shrink pages, - * error code when there is an error. - */ -static int __frontswap_shrink(unsigned long target_pages, - unsigned long *pages_to_unuse, - int *type) -{ - unsigned long total_pages = 0, total_pages_to_unuse; - - assert_spin_locked(&swap_lock); - - total_pages = __frontswap_curr_pages(); - if (total_pages <= target_pages) { - /* Nothing to do */ - *pages_to_unuse = 0; - return 1; - } - total_pages_to_unuse = total_pages - target_pages; - return __frontswap_unuse_pages(total_pages_to_unuse, pages_to_unuse, type); -} - -/* - * Frontswap, like a true swap device, may unnecessarily retain pages - * under certain circumstances; "shrink" frontswap is essentially a - * "partial swapoff" and works by calling try_to_unuse to attempt to - * unuse enough frontswap pages to attempt to -- subject to memory - * constraints -- reduce the number of pages in frontswap to the - * number given in the parameter target_pages. - */ -void frontswap_shrink(unsigned long target_pages) -{ - unsigned long pages_to_unuse = 0; - int type, ret; - - /* - * we don't want to hold swap_lock while doing a very - * lengthy try_to_unuse, but swap_list may change - * so restart scan from swap_active_head each time - */ - spin_lock(&swap_lock); - ret = __frontswap_shrink(target_pages, &pages_to_unuse, &type); - spin_unlock(&swap_lock); - if (ret == 0) - try_to_unuse(type, true, pages_to_unuse); - return; -} -EXPORT_SYMBOL(frontswap_shrink); - -/* - * Count and return the number of frontswap pages across all - * swap devices. This is exported so that backend drivers can - * determine current usage without reading debugfs. - */ -unsigned long frontswap_curr_pages(void) -{ - unsigned long totalpages = 0; - - spin_lock(&swap_lock); - totalpages = __frontswap_curr_pages(); - spin_unlock(&swap_lock); - - return totalpages; -} -EXPORT_SYMBOL(frontswap_curr_pages); static int __init init_frontswap(void) { diff --git a/mm/gup.c b/mm/gup.c index f0af462ac1e2b6695dda14c085a856b498188e4b..a9d4d724aef7497aba6ae6a787f20ff8dc8fa371 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -124,8 +124,8 @@ static inline struct page *try_get_compound_head(struct page *page, int refs) * considered failure, and furthermore, a likely bug in the caller, so a warning * is also emitted. */ -struct page *try_grab_compound_head(struct page *page, - int refs, unsigned int flags) +__maybe_unused struct page *try_grab_compound_head(struct page *page, + int refs, unsigned int flags) { if (flags & FOLL_GET) return try_get_compound_head(page, refs); @@ -208,10 +208,35 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) */ bool __must_check try_grab_page(struct page *page, unsigned int flags) { - if (!(flags & (FOLL_GET | FOLL_PIN))) - return true; + WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN)); - return try_grab_compound_head(page, 1, flags); + if (flags & FOLL_GET) + return try_get_page(page); + else if (flags & FOLL_PIN) { + int refs = 1; + + page = compound_head(page); + + if (WARN_ON_ONCE(page_ref_count(page) <= 0)) + return false; + + if (hpage_pincount_available(page)) + hpage_pincount_add(page, 1); + else + refs = GUP_PIN_COUNTING_BIAS; + + /* + * Similar to try_grab_compound_head(): even if using the + * hpage_pincount_add/_sub() routines, be sure to + * *also* increment the normal page refcount field at least + * once, so that the page really is pinned. + */ + page_ref_add(page, refs); + + mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1); + } + + return true; } /** diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 7c06db78a76cb559d0c8932ab5f7e79c8756a4fe..92196562687b61aa60ed0c047b29855b87bc5d89 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -36,7 +36,6 @@ depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc) unsigned int nr_entries; nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); - nr_entries = filter_irq_stacks(entries, nr_entries); return __stack_depot_save(entries, nr_entries, flags, can_alloc); } diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 5ad40e3add457db1e788314e8e46eb0b2dfb4e61..13128fa1306256e617a7641fcc78fcafa40fdc5d 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -47,7 +47,8 @@ static bool kfence_enabled __read_mostly; -static unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL; +unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL; +EXPORT_SYMBOL_GPL(kfence_sample_interval); /* Export for test modules. */ #ifdef MODULE_PARAM_PREFIX #undef MODULE_PARAM_PREFIX diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c index a22b1af85577d69b9df1f404a010eec6df4d000a..50dbb815a2a8308920e7e69a2829d70240f2e520 100644 --- a/mm/kfence/kfence_test.c +++ b/mm/kfence/kfence_test.c @@ -268,13 +268,13 @@ static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocat * 100x the sample interval should be more than enough to ensure we get * a KFENCE allocation eventually. */ - timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL); + timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval); /* * Especially for non-preemption kernels, ensure the allocation-gate * timer can catch up: after @resched_after, every failed allocation * attempt yields, to ensure the allocation-gate timer is scheduled. */ - resched_after = jiffies + msecs_to_jiffies(CONFIG_KFENCE_SAMPLE_INTERVAL); + resched_after = jiffies + msecs_to_jiffies(kfence_sample_interval); do { if (test_cache) alloc = kmem_cache_alloc(test_cache, gfp); @@ -608,7 +608,7 @@ static void test_gfpzero(struct kunit *test) int i; /* Skip if we think it'd take too long. */ - KFENCE_TEST_REQUIRES(test, CONFIG_KFENCE_SAMPLE_INTERVAL <= 100); + KFENCE_TEST_REQUIRES(test, kfence_sample_interval <= 100); setup_test_cache(test, size, 0, NULL); buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY); @@ -739,7 +739,7 @@ static void test_memcache_alloc_bulk(struct kunit *test) * 100x the sample interval should be more than enough to ensure we get * a KFENCE allocation eventually. */ - timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL); + timeout = jiffies + msecs_to_jiffies(100 * kfence_sample_interval); do { void *objects[100]; int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects), diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 35f14d0a00a6cdadd20f63c4feeac2d44b3c8e14..131492fd1148b4a40b28c159286955b191a5bdeb 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -1416,6 +1417,21 @@ static int khugepaged_add_pte_mapped_thp(struct mm_struct *mm, return 0; } +static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + spinlock_t *ptl; + pmd_t pmd; + + mmap_assert_write_locked(mm); + ptl = pmd_lock(vma->vm_mm, pmdp); + pmd = pmdp_collapse_flush(vma, addr, pmdp); + spin_unlock(ptl); + mm_dec_nr_ptes(mm); + page_table_check_pte_clear_range(mm, addr, pmd); + pte_free(mm, pmd_pgtable(pmd)); +} + /** * collapse_pte_mapped_thp - Try to collapse a pte-mapped THP for mm at * address haddr. @@ -1433,7 +1449,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) struct vm_area_struct *vma = find_vma(mm, haddr); struct page *hpage; pte_t *start_pte, *pte; - pmd_t *pmd, _pmd; + pmd_t *pmd; spinlock_t *ptl; int count = 0; int i; @@ -1509,12 +1525,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) } /* step 4: collapse pmd */ - ptl = pmd_lock(vma->vm_mm, pmd); - _pmd = pmdp_collapse_flush(vma, haddr, pmd); - spin_unlock(ptl); - mm_dec_nr_ptes(mm); - pte_free(mm, pmd_pgtable(_pmd)); - + collapse_and_free_pmd(mm, vma, haddr, pmd); drop_hpage: unlock_page(hpage); put_page(hpage); @@ -1552,7 +1563,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) struct vm_area_struct *vma; struct mm_struct *mm; unsigned long addr; - pmd_t *pmd, _pmd; + pmd_t *pmd; i_mmap_lock_write(mapping); vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) { @@ -1591,14 +1602,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * reverse order. Trylock is a way to avoid deadlock. */ if (mmap_write_trylock(mm)) { - if (!khugepaged_test_exit(mm)) { - spinlock_t *ptl = pmd_lock(mm, pmd); - /* assume page table is clear */ - _pmd = pmdp_collapse_flush(vma, addr, pmd); - spin_unlock(ptl); - mm_dec_nr_ptes(mm); - pte_free(mm, pmd_pgtable(_pmd)); - } + if (!khugepaged_test_exit(mm)) + collapse_and_free_pmd(mm, vma, addr, pmd); mmap_write_unlock(mm); } else { /* Try again later */ diff --git a/mm/kmemleak.c b/mm/kmemleak.c index dc3758fdba68d412e8a5f4881acf23a75f4a2181..7580baa76af1c9948aa6af99c52de2429ee10eac 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1410,7 +1410,8 @@ static void kmemleak_scan(void) { unsigned long flags; struct kmemleak_object *object; - int i; + struct zone *zone; + int __maybe_unused i; int new_leaks = 0; jiffies_last_scan = jiffies; @@ -1450,9 +1451,9 @@ static void kmemleak_scan(void) * Struct page scanning for each node. */ get_online_mems(); - for_each_online_node(i) { - unsigned long start_pfn = node_start_pfn(i); - unsigned long end_pfn = node_end_pfn(i); + for_each_populated_zone(zone) { + unsigned long start_pfn = zone->zone_start_pfn; + unsigned long end_pfn = zone_end_pfn(zone); unsigned long pfn; for (pfn = start_pfn; pfn < end_pfn; pfn++) { @@ -1461,8 +1462,8 @@ static void kmemleak_scan(void) if (!page) continue; - /* only scan pages belonging to this node */ - if (page_to_nid(page) != i) + /* only scan pages belonging to this zone */ + if (page_zone(page) != zone) continue; /* only scan if page is in use */ if (page_count(page) == 0) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 09d342c7cbd0d9f23b3c916c8dab2132e1d35ffe..36e9f38c919d0976494b96f60f4d1bcee09cf54d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -254,7 +254,7 @@ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr) } #ifdef CONFIG_MEMCG_KMEM -extern spinlock_t css_set_lock; +static DEFINE_SPINLOCK(objcg_lock); bool mem_cgroup_kmem_disabled(void) { @@ -298,9 +298,9 @@ static void obj_cgroup_release(struct percpu_ref *ref) if (nr_pages) obj_cgroup_uncharge_pages(objcg, nr_pages); - spin_lock_irqsave(&css_set_lock, flags); + spin_lock_irqsave(&objcg_lock, flags); list_del(&objcg->list); - spin_unlock_irqrestore(&css_set_lock, flags); + spin_unlock_irqrestore(&objcg_lock, flags); percpu_ref_exit(ref); kfree_rcu(objcg, rcu); @@ -332,7 +332,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, objcg = rcu_replace_pointer(memcg->objcg, NULL, true); - spin_lock_irq(&css_set_lock); + spin_lock_irq(&objcg_lock); /* 1) Ready to reparent active objcg. */ list_add(&objcg->list, &memcg->objcg_list); @@ -342,7 +342,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, /* 3) Move already reparented objcgs to the parent's list */ list_splice(&memcg->objcg_list, &parent->objcg_list); - spin_unlock_irq(&css_set_lock); + spin_unlock_irq(&objcg_lock); percpu_ref_kill(&objcg->refcnt); } diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 14ae5c18e77668b431d6a1123108e5ddc3ddeaa6..97a9ed8f87a96ac0a3e0ef02725b168092e7a37a 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1595,6 +1595,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, goto out; } + /* + * Pages instantiated by device-dax (not filesystem-dax) + * may be compound pages. + */ + page = compound_head(page); + /* * Prevent the inode from being freed while we are interrogating * the address_space, typically this would be handled by diff --git a/mm/migrate.c b/mm/migrate.c index 18ce840914f0d9b1b5bee825a0a0bd260899e583..c7da064b4781b80c46f8c4e18abecfddf81b2283 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -291,7 +291,6 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, { pte_t pte; swp_entry_t entry; - struct folio *folio; spin_lock(ptl); pte = *ptep; @@ -302,17 +301,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, if (!is_migration_entry(entry)) goto out; - folio = page_folio(pfn_swap_entry_to_page(entry)); - - /* - * Once page cache replacement of page migration started, page_count - * is zero; but we must not call folio_put_wait_locked() without - * a ref. Use folio_try_get(), and just fault again if it fails. - */ - if (!folio_try_get(folio)) - goto out; - pte_unmap_unlock(ptep, ptl); - folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); + migration_entry_wait_on_locked(entry, ptep, ptl); return; out: pte_unmap_unlock(ptep, ptl); @@ -337,16 +326,11 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) { spinlock_t *ptl; - struct folio *folio; ptl = pmd_lock(mm, pmd); if (!is_pmd_migration_entry(*pmd)) goto unlock; - folio = page_folio(pfn_swap_entry_to_page(pmd_to_swp_entry(*pmd))); - if (!folio_try_get(folio)) - goto unlock; - spin_unlock(ptl); - folio_put_wait_locked(folio, TASK_UNINTERRUPTIBLE); + migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), NULL, ptl); return; unlock: spin_unlock(ptl); @@ -2431,22 +2415,8 @@ static bool migrate_vma_check_page(struct page *page) return false; /* Page from ZONE_DEVICE have one extra reference */ - if (is_zone_device_page(page)) { - /* - * Private page can never be pin as they have no valid pte and - * GUP will fail for those. Yet if there is a pending migration - * a thread might try to wait on the pte migration entry and - * will bump the page reference count. Sadly there is no way to - * differentiate a regular pin from migration wait. Hence to - * avoid 2 racing thread trying to migrate back to CPU to enter - * infinite loop (one stopping migration because the other is - * waiting on pte migration entry). We always return true here. - * - * FIXME proper solution is to rework migration_entry_wait() so - * it does not need to take a reference on page. - */ - return is_device_private_page(page); - } + if (is_zone_device_page(page)) + extra++; /* For file back page */ if (page_mapping(page)) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 6a0ddda6b3c53b4e5baf421ec9c8b2b215a7de90..f67c4c70f17f66c08626f57b1999462782395eff 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -115,7 +115,7 @@ static void unset_migratetype_isolate(struct page *page, unsigned migratetype) * onlining - just onlined memory won't immediately be considered for * allocation. */ - if (!isolated_page && PageBuddy(page)) { + if (!isolated_page) { nr_pages = move_freepages_block(zone, page, migratetype, NULL); __mod_zone_freepage_state(zone, nr_pages, migratetype); } diff --git a/mm/page_owner.c b/mm/page_owner.c index 5eea061bb1e5994361debbd7deca28a170b4bcd6..99e360df9465237f1899f4f58958573ac0fa781a 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -80,6 +80,8 @@ static __init void init_page_owner(void) if (!page_owner_enabled) return; + stack_depot_init(); + register_dummy_stack(); register_failure_stack(); register_early_stack(); diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 7504e7caa2a1af7e0417db21a64cf1f50bf265d0..3763bd077861a52a785b6b96192542578aaafef9 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -86,8 +86,8 @@ static void page_table_check_clear(struct mm_struct *mm, unsigned long addr, { struct page_ext *page_ext; struct page *page; + unsigned long i; bool anon; - int i; if (!pfn_valid(pfn)) return; @@ -121,8 +121,8 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr, { struct page_ext *page_ext; struct page *page; + unsigned long i; bool anon; - int i; if (!pfn_valid(pfn)) return; @@ -152,10 +152,10 @@ static void page_table_check_set(struct mm_struct *mm, unsigned long addr, void __page_table_check_zero(struct page *page, unsigned int order) { struct page_ext *page_ext = lookup_page_ext(page); - int i; + unsigned long i; BUG_ON(!page_ext); - for (i = 0; i < (1 << order); i++) { + for (i = 0; i < (1ul << order); i++) { struct page_table_check *ptc = get_page_table_check(page_ext); BUG_ON(atomic_read(&ptc->anon_map_count)); @@ -206,17 +206,10 @@ EXPORT_SYMBOL(__page_table_check_pud_clear); void __page_table_check_pte_set(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { - pte_t old_pte; - if (&init_mm == mm) return; - old_pte = *ptep; - if (pte_user_accessible_page(old_pte)) { - page_table_check_clear(mm, addr, pte_pfn(old_pte), - PAGE_SIZE >> PAGE_SHIFT); - } - + __page_table_check_pte_clear(mm, addr, *ptep); if (pte_user_accessible_page(pte)) { page_table_check_set(mm, addr, pte_pfn(pte), PAGE_SIZE >> PAGE_SHIFT, @@ -228,17 +221,10 @@ EXPORT_SYMBOL(__page_table_check_pte_set); void __page_table_check_pmd_set(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - pmd_t old_pmd; - if (&init_mm == mm) return; - old_pmd = *pmdp; - if (pmd_user_accessible_page(old_pmd)) { - page_table_check_clear(mm, addr, pmd_pfn(old_pmd), - PMD_PAGE_SIZE >> PAGE_SHIFT); - } - + __page_table_check_pmd_clear(mm, addr, *pmdp); if (pmd_user_accessible_page(pmd)) { page_table_check_set(mm, addr, pmd_pfn(pmd), PMD_PAGE_SIZE >> PAGE_SHIFT, @@ -250,17 +236,10 @@ EXPORT_SYMBOL(__page_table_check_pmd_set); void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, pud_t *pudp, pud_t pud) { - pud_t old_pud; - if (&init_mm == mm) return; - old_pud = *pudp; - if (pud_user_accessible_page(old_pud)) { - page_table_check_clear(mm, addr, pud_pfn(old_pud), - PUD_PAGE_SIZE >> PAGE_SHIFT); - } - + __page_table_check_pud_clear(mm, addr, *pudp); if (pud_user_accessible_page(pud)) { page_table_check_set(mm, addr, pud_pfn(pud), PUD_PAGE_SIZE >> PAGE_SHIFT, @@ -268,3 +247,23 @@ void __page_table_check_pud_set(struct mm_struct *mm, unsigned long addr, } } EXPORT_SYMBOL(__page_table_check_pud_set); + +void __page_table_check_pte_clear_range(struct mm_struct *mm, + unsigned long addr, + pmd_t pmd) +{ + if (&init_mm == mm) + return; + + if (!pmd_bad(pmd) && !pmd_leaf(pmd)) { + pte_t *ptep = pte_offset_map(&pmd, addr); + unsigned long i; + + pte_unmap(ptep); + for (i = 0; i < PTRS_PER_PTE; i++) { + __page_table_check_pte_clear(mm, addr, *ptep); + addr += PAGE_SIZE; + ptep++; + } + } +} diff --git a/mm/percpu.c b/mm/percpu.c index a188ac9a267d25f12f64d422528c6471e32c378d..ea28db283044eb40c84e78632663256dd7a14e9b 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -779,7 +779,7 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index) { struct pcpu_block_md *block = chunk->md_blocks + index; unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index); - unsigned int rs, re, start; /* region start, region end */ + unsigned int start, end; /* region start, region end */ /* promote scan_hint to contig_hint */ if (block->scan_hint) { @@ -795,9 +795,8 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index) block->right_free = 0; /* iterate over free areas and update the contig hints */ - bitmap_for_each_clear_region(alloc_map, rs, re, start, - PCPU_BITMAP_BLOCK_BITS) - pcpu_block_update(block, rs, re); + for_each_clear_bitrange_from(start, end, alloc_map, PCPU_BITMAP_BLOCK_BITS) + pcpu_block_update(block, start, end); } /** @@ -1070,17 +1069,18 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off, static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits, int *next_off) { - unsigned int page_start, page_end, rs, re; + unsigned int start, end; - page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE); - page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE); + start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE); + end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE); - rs = page_start; - bitmap_next_clear_region(chunk->populated, &rs, &re, page_end); - if (rs >= page_end) + start = find_next_zero_bit(chunk->populated, end, start); + if (start >= end) return true; - *next_off = re * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE; + end = find_next_bit(chunk->populated, end, start + 1); + + *next_off = end * PAGE_SIZE / PCPU_MIN_ALLOC_SIZE; return false; } @@ -1851,13 +1851,12 @@ area_found: /* populate if not all pages are already there */ if (!is_atomic) { - unsigned int page_start, page_end, rs, re; + unsigned int page_end, rs, re; - page_start = PFN_DOWN(off); + rs = PFN_DOWN(off); page_end = PFN_UP(off + size); - bitmap_for_each_clear_region(chunk->populated, rs, re, - page_start, page_end) { + for_each_clear_bitrange_from(rs, re, chunk->populated, page_end) { WARN_ON(chunk->immutable); ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); @@ -2013,8 +2012,7 @@ static void pcpu_balance_free(bool empty_only) list_for_each_entry_safe(chunk, next, &to_free, list) { unsigned int rs, re; - bitmap_for_each_set_region(chunk->populated, rs, re, 0, - chunk->nr_pages) { + for_each_set_bitrange(rs, re, chunk->populated, chunk->nr_pages) { pcpu_depopulate_chunk(chunk, rs, re); spin_lock_irq(&pcpu_lock); pcpu_chunk_depopulated(chunk, rs, re); @@ -2084,8 +2082,7 @@ retry_pop: continue; /* @chunk can't go away while pcpu_alloc_mutex is held */ - bitmap_for_each_clear_region(chunk->populated, rs, re, 0, - chunk->nr_pages) { + for_each_clear_bitrange(rs, re, chunk->populated, chunk->nr_pages) { int nr = min_t(int, re - rs, nr_to_pop); spin_unlock_irq(&pcpu_lock); diff --git a/mm/shmem.c b/mm/shmem.c index 66909efd0a1b25bee37b773aecbb550b13c0175c..a09b29ec2b45c906fef4e8fcfc0d88a93ea99718 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include @@ -1152,7 +1151,7 @@ static void shmem_evict_inode(struct inode *inode) static int shmem_find_swap_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices, - unsigned int type, bool frontswap) + unsigned int type) { XA_STATE(xas, &mapping->i_pages, start); struct page *page; @@ -1173,9 +1172,6 @@ static int shmem_find_swap_entries(struct address_space *mapping, entry = radix_to_swp_entry(page); if (swp_type(entry) != type) continue; - if (frontswap && - !frontswap_test(swap_info[type], swp_offset(entry))) - continue; indices[ret] = xas.xa_index; entries[ret] = page; @@ -1228,26 +1224,20 @@ static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec, /* * If swap found in inode, free it and move page from swapcache to filecache. */ -static int shmem_unuse_inode(struct inode *inode, unsigned int type, - bool frontswap, unsigned long *fs_pages_to_unuse) +static int shmem_unuse_inode(struct inode *inode, unsigned int type) { struct address_space *mapping = inode->i_mapping; pgoff_t start = 0; struct pagevec pvec; pgoff_t indices[PAGEVEC_SIZE]; - bool frontswap_partial = (frontswap && *fs_pages_to_unuse > 0); int ret = 0; pagevec_init(&pvec); do { unsigned int nr_entries = PAGEVEC_SIZE; - if (frontswap_partial && *fs_pages_to_unuse < PAGEVEC_SIZE) - nr_entries = *fs_pages_to_unuse; - pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries, - pvec.pages, indices, - type, frontswap); + pvec.pages, indices, type); if (pvec.nr == 0) { ret = 0; break; @@ -1257,14 +1247,6 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type, if (ret < 0) break; - if (frontswap_partial) { - *fs_pages_to_unuse -= ret; - if (*fs_pages_to_unuse == 0) { - ret = FRONTSWAP_PAGES_UNUSED; - break; - } - } - start = indices[pvec.nr - 1]; } while (true); @@ -1276,8 +1258,7 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type, * device 'type' back into memory, so the swap device can be * unused. */ -int shmem_unuse(unsigned int type, bool frontswap, - unsigned long *fs_pages_to_unuse) +int shmem_unuse(unsigned int type) { struct shmem_inode_info *info, *next; int error = 0; @@ -1300,8 +1281,7 @@ int shmem_unuse(unsigned int type, bool frontswap, atomic_inc(&info->stop_eviction); mutex_unlock(&shmem_swaplist_mutex); - error = shmem_unuse_inode(&info->vfs_inode, type, frontswap, - fs_pages_to_unuse); + error = shmem_unuse_inode(&info->vfs_inode, type); cond_resched(); mutex_lock(&shmem_swaplist_mutex); @@ -4015,8 +3995,7 @@ int __init shmem_init(void) return 0; } -int shmem_unuse(unsigned int type, bool frontswap, - unsigned long *fs_pages_to_unuse) +int shmem_unuse(unsigned int type) { return 0; } diff --git a/mm/swapfile.c b/mm/swapfile.c index caa9f81a0d15fa93168bb97f17eab792f30e8eec..bf0df7aa7158fee191a5cb03667be0f598971021 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -49,7 +49,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, unsigned char); static void free_swap_count_continuations(struct swap_info_struct *); -DEFINE_SPINLOCK(swap_lock); +static DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; atomic_long_t nr_swap_pages; /* @@ -71,7 +71,7 @@ static const char Unused_offset[] = "Unused swap offset entry "; * all active swap_info_structs * protected with swap_lock, and ordered by priority. */ -PLIST_HEAD(swap_active_head); +static PLIST_HEAD(swap_active_head); /* * all available (active, not full) swap_info_structs @@ -1923,8 +1923,7 @@ out: static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, - unsigned int type, bool frontswap, - unsigned long *fs_pages_to_unuse) + unsigned int type) { struct page *page; swp_entry_t entry; @@ -1945,9 +1944,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, continue; offset = swp_offset(entry); - if (frontswap && !frontswap_test(si, offset)) - continue; - pte_unmap(pte); swap_map = &si->swap_map[offset]; page = lookup_swap_cache(entry, vma, addr); @@ -1979,11 +1975,6 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, try_to_free_swap(page); unlock_page(page); put_page(page); - - if (*fs_pages_to_unuse && !--(*fs_pages_to_unuse)) { - ret = FRONTSWAP_PAGES_UNUSED; - goto out; - } try_next: pte = pte_offset_map(pmd, addr); } while (pte++, addr += PAGE_SIZE, addr != end); @@ -1996,8 +1987,7 @@ out: static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, unsigned long addr, unsigned long end, - unsigned int type, bool frontswap, - unsigned long *fs_pages_to_unuse) + unsigned int type) { pmd_t *pmd; unsigned long next; @@ -2009,8 +1999,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, next = pmd_addr_end(addr, end); if (pmd_none_or_trans_huge_or_clear_bad(pmd)) continue; - ret = unuse_pte_range(vma, pmd, addr, next, type, - frontswap, fs_pages_to_unuse); + ret = unuse_pte_range(vma, pmd, addr, next, type); if (ret) return ret; } while (pmd++, addr = next, addr != end); @@ -2019,8 +2008,7 @@ static inline int unuse_pmd_range(struct vm_area_struct *vma, pud_t *pud, static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, unsigned long addr, unsigned long end, - unsigned int type, bool frontswap, - unsigned long *fs_pages_to_unuse) + unsigned int type) { pud_t *pud; unsigned long next; @@ -2031,8 +2019,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, next = pud_addr_end(addr, end); if (pud_none_or_clear_bad(pud)) continue; - ret = unuse_pmd_range(vma, pud, addr, next, type, - frontswap, fs_pages_to_unuse); + ret = unuse_pmd_range(vma, pud, addr, next, type); if (ret) return ret; } while (pud++, addr = next, addr != end); @@ -2041,8 +2028,7 @@ static inline int unuse_pud_range(struct vm_area_struct *vma, p4d_t *p4d, static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, unsigned long addr, unsigned long end, - unsigned int type, bool frontswap, - unsigned long *fs_pages_to_unuse) + unsigned int type) { p4d_t *p4d; unsigned long next; @@ -2053,16 +2039,14 @@ static inline int unuse_p4d_range(struct vm_area_struct *vma, pgd_t *pgd, next = p4d_addr_end(addr, end); if (p4d_none_or_clear_bad(p4d)) continue; - ret = unuse_pud_range(vma, p4d, addr, next, type, - frontswap, fs_pages_to_unuse); + ret = unuse_pud_range(vma, p4d, addr, next, type); if (ret) return ret; } while (p4d++, addr = next, addr != end); return 0; } -static int unuse_vma(struct vm_area_struct *vma, unsigned int type, - bool frontswap, unsigned long *fs_pages_to_unuse) +static int unuse_vma(struct vm_area_struct *vma, unsigned int type) { pgd_t *pgd; unsigned long addr, end, next; @@ -2076,16 +2060,14 @@ static int unuse_vma(struct vm_area_struct *vma, unsigned int type, next = pgd_addr_end(addr, end); if (pgd_none_or_clear_bad(pgd)) continue; - ret = unuse_p4d_range(vma, pgd, addr, next, type, - frontswap, fs_pages_to_unuse); + ret = unuse_p4d_range(vma, pgd, addr, next, type); if (ret) return ret; } while (pgd++, addr = next, addr != end); return 0; } -static int unuse_mm(struct mm_struct *mm, unsigned int type, - bool frontswap, unsigned long *fs_pages_to_unuse) +static int unuse_mm(struct mm_struct *mm, unsigned int type) { struct vm_area_struct *vma; int ret = 0; @@ -2093,8 +2075,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type, mmap_read_lock(mm); for (vma = mm->mmap; vma; vma = vma->vm_next) { if (vma->anon_vma) { - ret = unuse_vma(vma, type, frontswap, - fs_pages_to_unuse); + ret = unuse_vma(vma, type); if (ret) break; } @@ -2110,7 +2091,7 @@ static int unuse_mm(struct mm_struct *mm, unsigned int type, * if there are no inuse entries after prev till end of the map. */ static unsigned int find_next_to_unuse(struct swap_info_struct *si, - unsigned int prev, bool frontswap) + unsigned int prev) { unsigned int i; unsigned char count; @@ -2124,8 +2105,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, for (i = prev + 1; i < si->max; i++) { count = READ_ONCE(si->swap_map[i]); if (count && swap_count(count) != SWAP_MAP_BAD) - if (!frontswap || frontswap_test(si, i)) - break; + break; if ((i % LATENCY_LIMIT) == 0) cond_resched(); } @@ -2136,12 +2116,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, return i; } -/* - * If the boolean frontswap is true, only unuse pages_to_unuse pages; - * pages_to_unuse==0 means all pages; ignored if frontswap is false - */ -int try_to_unuse(unsigned int type, bool frontswap, - unsigned long pages_to_unuse) +static int try_to_unuse(unsigned int type) { struct mm_struct *prev_mm; struct mm_struct *mm; @@ -2155,13 +2130,10 @@ int try_to_unuse(unsigned int type, bool frontswap, if (!READ_ONCE(si->inuse_pages)) return 0; - if (!frontswap) - pages_to_unuse = 0; - retry: - retval = shmem_unuse(type, frontswap, &pages_to_unuse); + retval = shmem_unuse(type); if (retval) - goto out; + return retval; prev_mm = &init_mm; mmget(prev_mm); @@ -2178,11 +2150,10 @@ retry: spin_unlock(&mmlist_lock); mmput(prev_mm); prev_mm = mm; - retval = unuse_mm(mm, type, frontswap, &pages_to_unuse); - + retval = unuse_mm(mm, type); if (retval) { mmput(prev_mm); - goto out; + return retval; } /* @@ -2199,7 +2170,7 @@ retry: i = 0; while (READ_ONCE(si->inuse_pages) && !signal_pending(current) && - (i = find_next_to_unuse(si, i, frontswap)) != 0) { + (i = find_next_to_unuse(si, i)) != 0) { entry = swp_entry(type, i); page = find_get_page(swap_address_space(entry), i); @@ -2217,14 +2188,6 @@ retry: try_to_free_swap(page); unlock_page(page); put_page(page); - - /* - * For frontswap, we just need to unuse pages_to_unuse, if - * it was specified. Need not check frontswap again here as - * we already zeroed out pages_to_unuse if not frontswap. - */ - if (pages_to_unuse && --pages_to_unuse == 0) - goto out; } /* @@ -2242,10 +2205,10 @@ retry: if (READ_ONCE(si->inuse_pages)) { if (!signal_pending(current)) goto retry; - retval = -EINTR; + return -EINTR; } -out: - return (retval == FRONTSWAP_PAGES_UNUSED) ? 0 : retval; + + return 0; } /* @@ -2463,7 +2426,8 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, struct swap_cluster_info *cluster_info, unsigned long *frontswap_map) { - frontswap_init(p->type, frontswap_map); + if (IS_ENABLED(CONFIG_FRONTSWAP)) + frontswap_init(p->type, frontswap_map); spin_lock(&swap_lock); spin_lock(&p->lock); setup_swap_info(p, prio, swap_map, cluster_info); @@ -2576,7 +2540,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) disable_swap_slots_cache_lock(); set_current_oom_origin(); - err = try_to_unuse(p->type, false, 0); /* force unuse all pages */ + err = try_to_unuse(p->type); clear_current_oom_origin(); if (err) { diff --git a/mm/truncate.c b/mm/truncate.c index 5e243d7269c0f9d2e70625b70ecca9c4ae4f439c..9dbf0b75da5defbada187e9b7fe5215a69031ff4 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -22,7 +22,6 @@ #include /* grr. try_to_release_page, do_invalidatepage */ #include -#include #include #include "internal.h" @@ -264,7 +263,6 @@ bool truncate_inode_partial_folio(struct folio *folio, loff_t start, loff_t end) */ folio_zero_range(folio, offset, length); - cleancache_invalidate_page(folio->mapping, &folio->page); if (folio_has_private(folio)) do_invalidatepage(&folio->page, offset, length); if (!folio_test_large(folio)) @@ -351,7 +349,7 @@ void truncate_inode_pages_range(struct address_space *mapping, bool same_folio; if (mapping_empty(mapping)) - goto out; + return; /* * 'start' and 'end' always covers the range of pages to be fully @@ -442,9 +440,6 @@ void truncate_inode_pages_range(struct address_space *mapping, folio_batch_release(&fbatch); index++; } - -out: - cleancache_invalidate_inode(mapping); } EXPORT_SYMBOL(truncate_inode_pages_range); @@ -498,10 +493,6 @@ void truncate_inode_pages_final(struct address_space *mapping) xa_unlock_irq(&mapping->i_pages); } - /* - * Cleancache needs notification even if there are no pages or shadow - * entries. - */ truncate_inode_pages(mapping, 0); } EXPORT_SYMBOL(truncate_inode_pages_final); @@ -661,7 +652,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, int did_range_unmap = 0; if (mapping_empty(mapping)) - goto out; + return 0; folio_batch_init(&fbatch); index = start; @@ -725,8 +716,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping, if (dax_mapping(mapping)) { unmap_mapping_pages(mapping, start, end - start + 1, false); } -out: - cleancache_invalidate_inode(mapping); return ret; } EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); diff --git a/mm/vmscan.c b/mm/vmscan.c index 090bfb605ecf077ca329fbdd454ebeef720c2b87..59b14e0d696c9578fd4dc4ddb46a893f98aec15f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1066,8 +1066,10 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) * forward progress (e.g. journalling workqueues or kthreads). */ if (!current_is_kswapd() && - current->flags & (PF_IO_WORKER|PF_KTHREAD)) + current->flags & (PF_IO_WORKER|PF_KTHREAD)) { + cond_resched(); return; + } /* * These figures are pulled out of thin air. diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 0d3b65939016c38b7d77fd79b2d8180a65d33a9a..9152fbde33b50f93a4f8ef949caebd6d44c5ffcf 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -30,6 +30,14 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +/* + * lock ordering: + * page_lock + * pool->migrate_lock + * class->lock + * zspage->lock + */ + #include #include #include @@ -57,6 +65,7 @@ #include #include #include +#include #define ZSPAGE_MAGIC 0x58 @@ -100,15 +109,6 @@ #define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT) -/* - * Memory for allocating for handle keeps object position by - * encoding and the encoded value has a room - * in least bit(ie, look at obj_to_location). - * We use the bit to synchronize between object access by - * user and migration. - */ -#define HANDLE_PIN_BIT 0 - /* * Head in allocated object should have OBJ_ALLOCATED_TAG * to identify the object was allocated or not. @@ -121,6 +121,7 @@ #define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS - OBJ_TAG_BITS) #define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) +#define HUGE_BITS 1 #define FULLNESS_BITS 2 #define CLASS_BITS 8 #define ISOLATED_BITS 3 @@ -158,7 +159,7 @@ enum fullness_group { NR_ZS_FULLNESS, }; -enum zs_stat_type { +enum class_stat_type { CLASS_EMPTY, CLASS_ALMOST_EMPTY, CLASS_ALMOST_FULL, @@ -213,22 +214,6 @@ struct size_class { struct zs_size_stat stats; }; -/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ -static void SetPageHugeObject(struct page *page) -{ - SetPageOwnerPriv1(page); -} - -static void ClearPageHugeObject(struct page *page) -{ - ClearPageOwnerPriv1(page); -} - -static int PageHugeObject(struct page *page) -{ - return PageOwnerPriv1(page); -} - /* * Placed within free objects to form a singly linked list. * For every zspage, zspage->freeobj gives head of this list. @@ -269,15 +254,14 @@ struct zs_pool { #ifdef CONFIG_COMPACTION struct inode *inode; struct work_struct free_work; - /* A wait queue for when migration races with async_free_zspage() */ - struct wait_queue_head migration_wait; - atomic_long_t isolated_pages; - bool destroying; #endif + /* protect page/zspage migration */ + rwlock_t migrate_lock; }; struct zspage { struct { + unsigned int huge:HUGE_BITS; unsigned int fullness:FULLNESS_BITS; unsigned int class:CLASS_BITS + 1; unsigned int isolated:ISOLATED_BITS; @@ -293,17 +277,32 @@ struct zspage { }; struct mapping_area { + local_lock_t lock; char *vm_buf; /* copy buffer for objects that span pages */ char *vm_addr; /* address of kmap_atomic()'ed pages */ enum zs_mapmode vm_mm; /* mapping mode */ }; +/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */ +static void SetZsHugePage(struct zspage *zspage) +{ + zspage->huge = 1; +} + +static bool ZsHugePage(struct zspage *zspage) +{ + return zspage->huge; +} + #ifdef CONFIG_COMPACTION static int zs_register_migration(struct zs_pool *pool); static void zs_unregister_migration(struct zs_pool *pool); static void migrate_lock_init(struct zspage *zspage); static void migrate_read_lock(struct zspage *zspage); static void migrate_read_unlock(struct zspage *zspage); +static void migrate_write_lock(struct zspage *zspage); +static void migrate_write_lock_nested(struct zspage *zspage); +static void migrate_write_unlock(struct zspage *zspage); static void kick_deferred_free(struct zs_pool *pool); static void init_deferred_free(struct zs_pool *pool); static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage); @@ -315,6 +314,9 @@ static void zs_unregister_migration(struct zs_pool *pool) {} static void migrate_lock_init(struct zspage *zspage) {} static void migrate_read_lock(struct zspage *zspage) {} static void migrate_read_unlock(struct zspage *zspage) {} +static void migrate_write_lock(struct zspage *zspage) {} +static void migrate_write_lock_nested(struct zspage *zspage) {} +static void migrate_write_unlock(struct zspage *zspage) {} static void kick_deferred_free(struct zs_pool *pool) {} static void init_deferred_free(struct zs_pool *pool) {} static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage) {} @@ -366,14 +368,10 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage) kmem_cache_free(pool->zspage_cachep, zspage); } +/* class->lock(which owns the handle) synchronizes races */ static void record_obj(unsigned long handle, unsigned long obj) { - /* - * lsb of @obj represents handle lock while other bits - * represent object value the handle is pointing so - * updating shouldn't do store tearing. - */ - WRITE_ONCE(*(unsigned long *)handle, obj); + *(unsigned long *)handle = obj; } /* zpool driver */ @@ -455,12 +453,9 @@ MODULE_ALIAS("zpool-zsmalloc"); #endif /* CONFIG_ZPOOL */ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ -static DEFINE_PER_CPU(struct mapping_area, zs_map_area); - -static bool is_zspage_isolated(struct zspage *zspage) -{ - return zspage->isolated; -} +static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { + .lock = INIT_LOCAL_LOCK(lock), +}; static __maybe_unused int is_first_page(struct page *page) { @@ -517,6 +512,12 @@ static void get_zspage_mapping(struct zspage *zspage, *class_idx = zspage->class; } +static struct size_class *zspage_class(struct zs_pool *pool, + struct zspage *zspage) +{ + return pool->size_class[zspage->class]; +} + static void set_zspage_mapping(struct zspage *zspage, unsigned int class_idx, enum fullness_group fullness) @@ -543,21 +544,21 @@ static int get_size_class_index(int size) return min_t(int, ZS_SIZE_CLASSES - 1, idx); } -/* type can be of enum type zs_stat_type or fullness_group */ -static inline void zs_stat_inc(struct size_class *class, +/* type can be of enum type class_stat_type or fullness_group */ +static inline void class_stat_inc(struct size_class *class, int type, unsigned long cnt) { class->stats.objs[type] += cnt; } -/* type can be of enum type zs_stat_type or fullness_group */ -static inline void zs_stat_dec(struct size_class *class, +/* type can be of enum type class_stat_type or fullness_group */ +static inline void class_stat_dec(struct size_class *class, int type, unsigned long cnt) { class->stats.objs[type] -= cnt; } -/* type can be of enum type zs_stat_type or fullness_group */ +/* type can be of enum type class_stat_type or fullness_group */ static inline unsigned long zs_stat_get(struct size_class *class, int type) { @@ -719,7 +720,7 @@ static void insert_zspage(struct size_class *class, { struct zspage *head; - zs_stat_inc(class, fullness, 1); + class_stat_inc(class, fullness, 1); head = list_first_entry_or_null(&class->fullness_list[fullness], struct zspage, list); /* @@ -741,10 +742,9 @@ static void remove_zspage(struct size_class *class, enum fullness_group fullness) { VM_BUG_ON(list_empty(&class->fullness_list[fullness])); - VM_BUG_ON(is_zspage_isolated(zspage)); list_del_init(&zspage->list); - zs_stat_dec(class, fullness, 1); + class_stat_dec(class, fullness, 1); } /* @@ -767,13 +767,9 @@ static enum fullness_group fix_fullness_group(struct size_class *class, if (newfg == currfg) goto out; - if (!is_zspage_isolated(zspage)) { - remove_zspage(class, zspage, currfg); - insert_zspage(class, zspage, newfg); - } - + remove_zspage(class, zspage, currfg); + insert_zspage(class, zspage, newfg); set_zspage_mapping(zspage, class_idx, newfg); - out: return newfg; } @@ -824,7 +820,9 @@ static struct zspage *get_zspage(struct page *page) static struct page *get_next_page(struct page *page) { - if (unlikely(PageHugeObject(page))) + struct zspage *zspage = get_zspage(page); + + if (unlikely(ZsHugePage(zspage))) return NULL; return (struct page *)page->index; @@ -844,6 +842,12 @@ static void obj_to_location(unsigned long obj, struct page **page, *obj_idx = (obj & OBJ_INDEX_MASK); } +static void obj_to_page(unsigned long obj, struct page **page) +{ + obj >>= OBJ_TAG_BITS; + *page = pfn_to_page(obj >> OBJ_INDEX_BITS); +} + /** * location_to_obj - get obj value encoded from (, ) * @page: page object resides in zspage @@ -865,33 +869,22 @@ static unsigned long handle_to_obj(unsigned long handle) return *(unsigned long *)handle; } -static unsigned long obj_to_head(struct page *page, void *obj) +static bool obj_allocated(struct page *page, void *obj, unsigned long *phandle) { - if (unlikely(PageHugeObject(page))) { + unsigned long handle; + struct zspage *zspage = get_zspage(page); + + if (unlikely(ZsHugePage(zspage))) { VM_BUG_ON_PAGE(!is_first_page(page), page); - return page->index; + handle = page->index; } else - return *(unsigned long *)obj; -} - -static inline int testpin_tag(unsigned long handle) -{ - return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); -} - -static inline int trypin_tag(unsigned long handle) -{ - return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); -} + handle = *(unsigned long *)obj; -static void pin_tag(unsigned long handle) __acquires(bitlock) -{ - bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); -} + if (!(handle & OBJ_ALLOCATED_TAG)) + return false; -static void unpin_tag(unsigned long handle) __releases(bitlock) -{ - bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); + *phandle = handle & ~OBJ_ALLOCATED_TAG; + return true; } static void reset_page(struct page *page) @@ -900,7 +893,6 @@ static void reset_page(struct page *page) ClearPagePrivate(page); set_page_private(page, 0); page_mapcount_reset(page); - ClearPageHugeObject(page); page->index = 0; } @@ -952,7 +944,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class, cache_free_zspage(pool, zspage); - zs_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage); + class_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage); atomic_long_sub(class->pages_per_zspage, &pool->pages_allocated); } @@ -963,6 +955,11 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class, VM_BUG_ON(get_zspage_inuse(zspage)); VM_BUG_ON(list_empty(&zspage->list)); + /* + * Since zs_free couldn't be sleepable, this function cannot call + * lock_page. The page locks trylock_zspage got will be released + * by __free_zspage. + */ if (!trylock_zspage(zspage)) { kick_deferred_free(pool); return; @@ -1042,7 +1039,7 @@ static void create_page_chain(struct size_class *class, struct zspage *zspage, SetPagePrivate(page); if (unlikely(class->objs_per_zspage == 1 && class->pages_per_zspage == 1)) - SetPageHugeObject(page); + SetZsHugePage(zspage); } else { prev_page->index = (unsigned long)page; } @@ -1246,8 +1243,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, unsigned long obj, off; unsigned int obj_idx; - unsigned int class_idx; - enum fullness_group fg; struct size_class *class; struct mapping_area *area; struct page *pages[2]; @@ -1260,21 +1255,26 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, */ BUG_ON(in_interrupt()); - /* From now on, migration cannot move the object */ - pin_tag(handle); - + /* It guarantees it can get zspage from handle safely */ + read_lock(&pool->migrate_lock); obj = handle_to_obj(handle); obj_to_location(obj, &page, &obj_idx); zspage = get_zspage(page); - /* migration cannot move any subpage in this zspage */ + /* + * migration cannot move any zpages in this zspage. Here, class->lock + * is too heavy since callers would take some time until they calls + * zs_unmap_object API so delegate the locking from class to zspage + * which is smaller granularity. + */ migrate_read_lock(zspage); + read_unlock(&pool->migrate_lock); - get_zspage_mapping(zspage, &class_idx, &fg); - class = pool->size_class[class_idx]; + class = zspage_class(pool, zspage); off = (class->size * obj_idx) & ~PAGE_MASK; - area = &get_cpu_var(zs_map_area); + local_lock(&zs_map_area.lock); + area = this_cpu_ptr(&zs_map_area); area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ @@ -1290,7 +1290,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, ret = __zs_map_object(area, pages, off, class->size); out: - if (likely(!PageHugeObject(page))) + if (likely(!ZsHugePage(zspage))) ret += ZS_HANDLE_SIZE; return ret; @@ -1304,16 +1304,13 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) unsigned long obj, off; unsigned int obj_idx; - unsigned int class_idx; - enum fullness_group fg; struct size_class *class; struct mapping_area *area; obj = handle_to_obj(handle); obj_to_location(obj, &page, &obj_idx); zspage = get_zspage(page); - get_zspage_mapping(zspage, &class_idx, &fg); - class = pool->size_class[class_idx]; + class = zspage_class(pool, zspage); off = (class->size * obj_idx) & ~PAGE_MASK; area = this_cpu_ptr(&zs_map_area); @@ -1328,10 +1325,9 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) __zs_unmap_object(area, pages, off, class->size); } - put_cpu_var(zs_map_area); + local_unlock(&zs_map_area.lock); migrate_read_unlock(zspage); - unpin_tag(handle); } EXPORT_SYMBOL_GPL(zs_unmap_object); @@ -1354,17 +1350,19 @@ size_t zs_huge_class_size(struct zs_pool *pool) } EXPORT_SYMBOL_GPL(zs_huge_class_size); -static unsigned long obj_malloc(struct size_class *class, +static unsigned long obj_malloc(struct zs_pool *pool, struct zspage *zspage, unsigned long handle) { int i, nr_page, offset; unsigned long obj; struct link_free *link; + struct size_class *class; struct page *m_page; unsigned long m_offset; void *vaddr; + class = pool->size_class[zspage->class]; handle |= OBJ_ALLOCATED_TAG; obj = get_freeobj(zspage); @@ -1379,7 +1377,7 @@ static unsigned long obj_malloc(struct size_class *class, vaddr = kmap_atomic(m_page); link = (struct link_free *)vaddr + m_offset / sizeof(*link); set_freeobj(zspage, link->next >> OBJ_TAG_BITS); - if (likely(!PageHugeObject(m_page))) + if (likely(!ZsHugePage(zspage))) /* record handle in the header of allocated chunk */ link->handle = handle; else @@ -1388,7 +1386,6 @@ static unsigned long obj_malloc(struct size_class *class, kunmap_atomic(vaddr); mod_zspage_inuse(zspage, 1); - zs_stat_inc(class, OBJ_USED, 1); obj = location_to_obj(m_page, obj); @@ -1424,13 +1421,15 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) size += ZS_HANDLE_SIZE; class = pool->size_class[get_size_class_index(size)]; + /* class->lock effectively protects the zpage migration */ spin_lock(&class->lock); zspage = find_get_zspage(class); if (likely(zspage)) { - obj = obj_malloc(class, zspage, handle); + obj = obj_malloc(pool, zspage, handle); /* Now move the zspage to another fullness group, if required */ fix_fullness_group(class, zspage); record_obj(handle, obj); + class_stat_inc(class, OBJ_USED, 1); spin_unlock(&class->lock); return handle; @@ -1445,14 +1444,15 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) } spin_lock(&class->lock); - obj = obj_malloc(class, zspage, handle); + obj = obj_malloc(pool, zspage, handle); newfg = get_fullness_group(class, zspage); insert_zspage(class, zspage, newfg); set_zspage_mapping(zspage, class->index, newfg); record_obj(handle, obj); atomic_long_add(class->pages_per_zspage, &pool->pages_allocated); - zs_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage); + class_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage); + class_stat_inc(class, OBJ_USED, 1); /* We completely set up zspage so mark them as movable */ SetZsPageMovable(pool, zspage); @@ -1462,7 +1462,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) } EXPORT_SYMBOL_GPL(zs_malloc); -static void obj_free(struct size_class *class, unsigned long obj) +static void obj_free(int class_size, unsigned long obj) { struct link_free *link; struct zspage *zspage; @@ -1472,18 +1472,20 @@ static void obj_free(struct size_class *class, unsigned long obj) void *vaddr; obj_to_location(obj, &f_page, &f_objidx); - f_offset = (class->size * f_objidx) & ~PAGE_MASK; + f_offset = (class_size * f_objidx) & ~PAGE_MASK; zspage = get_zspage(f_page); vaddr = kmap_atomic(f_page); /* Insert this object in containing zspage's freelist */ link = (struct link_free *)(vaddr + f_offset); - link->next = get_freeobj(zspage) << OBJ_TAG_BITS; + if (likely(!ZsHugePage(zspage))) + link->next = get_freeobj(zspage) << OBJ_TAG_BITS; + else + f_page->index = 0; kunmap_atomic(vaddr); set_freeobj(zspage, f_objidx); mod_zspage_inuse(zspage, -1); - zs_stat_dec(class, OBJ_USED, 1); } void zs_free(struct zs_pool *pool, unsigned long handle) @@ -1491,42 +1493,33 @@ void zs_free(struct zs_pool *pool, unsigned long handle) struct zspage *zspage; struct page *f_page; unsigned long obj; - unsigned int f_objidx; - int class_idx; struct size_class *class; enum fullness_group fullness; - bool isolated; if (unlikely(!handle)) return; - pin_tag(handle); + /* + * The pool->migrate_lock protects the race with zpage's migration + * so it's safe to get the page from handle. + */ + read_lock(&pool->migrate_lock); obj = handle_to_obj(handle); - obj_to_location(obj, &f_page, &f_objidx); + obj_to_page(obj, &f_page); zspage = get_zspage(f_page); - - migrate_read_lock(zspage); - - get_zspage_mapping(zspage, &class_idx, &fullness); - class = pool->size_class[class_idx]; - + class = zspage_class(pool, zspage); spin_lock(&class->lock); - obj_free(class, obj); + read_unlock(&pool->migrate_lock); + + obj_free(class->size, obj); + class_stat_dec(class, OBJ_USED, 1); fullness = fix_fullness_group(class, zspage); - if (fullness != ZS_EMPTY) { - migrate_read_unlock(zspage); + if (fullness != ZS_EMPTY) goto out; - } - isolated = is_zspage_isolated(zspage); - migrate_read_unlock(zspage); - /* If zspage is isolated, zs_page_putback will free the zspage */ - if (likely(!isolated)) - free_zspage(pool, class, zspage); + free_zspage(pool, class, zspage); out: - spin_unlock(&class->lock); - unpin_tag(handle); cache_free_handle(pool, handle); } EXPORT_SYMBOL_GPL(zs_free); @@ -1601,7 +1594,6 @@ static void zs_object_copy(struct size_class *class, unsigned long dst, static unsigned long find_alloced_obj(struct size_class *class, struct page *page, int *obj_idx) { - unsigned long head; int offset = 0; int index = *obj_idx; unsigned long handle = 0; @@ -1611,13 +1603,8 @@ static unsigned long find_alloced_obj(struct size_class *class, offset += class->size * index; while (offset < PAGE_SIZE) { - head = obj_to_head(page, addr + offset); - if (head & OBJ_ALLOCATED_TAG) { - handle = head & ~OBJ_ALLOCATED_TAG; - if (trypin_tag(handle)) - break; - handle = 0; - } + if (obj_allocated(page, addr + offset, &handle)) + break; offset += class->size; index++; @@ -1663,25 +1650,16 @@ static int migrate_zspage(struct zs_pool *pool, struct size_class *class, /* Stop if there is no more space */ if (zspage_full(class, get_zspage(d_page))) { - unpin_tag(handle); ret = -ENOMEM; break; } used_obj = handle_to_obj(handle); - free_obj = obj_malloc(class, get_zspage(d_page), handle); + free_obj = obj_malloc(pool, get_zspage(d_page), handle); zs_object_copy(class, free_obj, used_obj); obj_idx++; - /* - * record_obj updates handle's value to free_obj and it will - * invalidate lock bit(ie, HANDLE_PIN_BIT) of handle, which - * breaks synchronization using pin_tag(e,g, zs_free) so - * let's keep the lock bit. - */ - free_obj |= BIT(HANDLE_PIN_BIT); record_obj(handle, free_obj); - unpin_tag(handle); - obj_free(class, used_obj); + obj_free(class->size, used_obj); } /* Remember last position in this iteration */ @@ -1706,7 +1684,6 @@ static struct zspage *isolate_zspage(struct size_class *class, bool source) zspage = list_first_entry_or_null(&class->fullness_list[fg[i]], struct zspage, list); if (zspage) { - VM_BUG_ON(is_zspage_isolated(zspage)); remove_zspage(class, zspage, fg[i]); return zspage; } @@ -1727,8 +1704,6 @@ static enum fullness_group putback_zspage(struct size_class *class, { enum fullness_group fullness; - VM_BUG_ON(is_zspage_isolated(zspage)); - fullness = get_fullness_group(class, zspage); insert_zspage(class, zspage, fullness); set_zspage_mapping(zspage, class->index, fullness); @@ -1797,6 +1772,11 @@ static void migrate_write_lock(struct zspage *zspage) write_lock(&zspage->lock); } +static void migrate_write_lock_nested(struct zspage *zspage) +{ + write_lock_nested(&zspage->lock, SINGLE_DEPTH_NESTING); +} + static void migrate_write_unlock(struct zspage *zspage) { write_unlock(&zspage->lock); @@ -1810,35 +1790,10 @@ static void inc_zspage_isolation(struct zspage *zspage) static void dec_zspage_isolation(struct zspage *zspage) { + VM_BUG_ON(zspage->isolated == 0); zspage->isolated--; } -static void putback_zspage_deferred(struct zs_pool *pool, - struct size_class *class, - struct zspage *zspage) -{ - enum fullness_group fg; - - fg = putback_zspage(class, zspage); - if (fg == ZS_EMPTY) - schedule_work(&pool->free_work); - -} - -static inline void zs_pool_dec_isolated(struct zs_pool *pool) -{ - VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); - atomic_long_dec(&pool->isolated_pages); - /* - * Checking pool->destroying must happen after atomic_long_dec() - * for pool->isolated_pages above. Paired with the smp_mb() in - * zs_unregister_migration(). - */ - smp_mb__after_atomic(); - if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) - wake_up_all(&pool->migration_wait); -} - static void replace_sub_page(struct size_class *class, struct zspage *zspage, struct page *newpage, struct page *oldpage) { @@ -1857,19 +1812,14 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage, create_page_chain(class, zspage, pages); set_first_obj_offset(newpage, get_first_obj_offset(oldpage)); - if (unlikely(PageHugeObject(oldpage))) + if (unlikely(ZsHugePage(zspage))) newpage->index = oldpage->index; __SetPageMovable(newpage, page_mapping(oldpage)); } static bool zs_page_isolate(struct page *page, isolate_mode_t mode) { - struct zs_pool *pool; - struct size_class *class; - int class_idx; - enum fullness_group fullness; struct zspage *zspage; - struct address_space *mapping; /* * Page is locked so zspage couldn't be destroyed. For detail, look at @@ -1879,41 +1829,9 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode) VM_BUG_ON_PAGE(PageIsolated(page), page); zspage = get_zspage(page); - - /* - * Without class lock, fullness could be stale while class_idx is okay - * because class_idx is constant unless page is freed so we should get - * fullness again under class lock. - */ - get_zspage_mapping(zspage, &class_idx, &fullness); - mapping = page_mapping(page); - pool = mapping->private_data; - class = pool->size_class[class_idx]; - - spin_lock(&class->lock); - if (get_zspage_inuse(zspage) == 0) { - spin_unlock(&class->lock); - return false; - } - - /* zspage is isolated for object migration */ - if (list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { - spin_unlock(&class->lock); - return false; - } - - /* - * If this is first time isolation for the zspage, isolate zspage from - * size_class to prevent further object allocation from the zspage. - */ - if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) { - get_zspage_mapping(zspage, &class_idx, &fullness); - atomic_long_inc(&pool->isolated_pages); - remove_zspage(class, zspage, fullness); - } - + migrate_write_lock(zspage); inc_zspage_isolation(zspage); - spin_unlock(&class->lock); + migrate_write_unlock(zspage); return true; } @@ -1923,16 +1841,13 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, { struct zs_pool *pool; struct size_class *class; - int class_idx; - enum fullness_group fullness; struct zspage *zspage; struct page *dummy; void *s_addr, *d_addr, *addr; - int offset, pos; - unsigned long handle, head; + int offset; + unsigned long handle; unsigned long old_obj, new_obj; unsigned int obj_idx; - int ret = -EAGAIN; /* * We cannot support the _NO_COPY case here, because copy needs to @@ -1945,35 +1860,25 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); - zspage = get_zspage(page); - - /* Concurrent compactor cannot migrate any subpage in zspage */ - migrate_write_lock(zspage); - get_zspage_mapping(zspage, &class_idx, &fullness); pool = mapping->private_data; - class = pool->size_class[class_idx]; - offset = get_first_obj_offset(page); + /* + * The pool migrate_lock protects the race between zpage migration + * and zs_free. + */ + write_lock(&pool->migrate_lock); + zspage = get_zspage(page); + class = zspage_class(pool, zspage); + + /* + * the class lock protects zpage alloc/free in the zspage. + */ spin_lock(&class->lock); - if (!get_zspage_inuse(zspage)) { - /* - * Set "offset" to end of the page so that every loops - * skips unnecessary object scanning. - */ - offset = PAGE_SIZE; - } + /* the migrate_write_lock protects zpage access via zs_map_object */ + migrate_write_lock(zspage); - pos = offset; + offset = get_first_obj_offset(page); s_addr = kmap_atomic(page); - while (pos < PAGE_SIZE) { - head = obj_to_head(page, s_addr + pos); - if (head & OBJ_ALLOCATED_TAG) { - handle = head & ~OBJ_ALLOCATED_TAG; - if (!trypin_tag(handle)) - goto unpin_objects; - } - pos += class->size; - } /* * Here, any user cannot access all objects in the zspage so let's move. @@ -1982,42 +1887,30 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, memcpy(d_addr, s_addr, PAGE_SIZE); kunmap_atomic(d_addr); - for (addr = s_addr + offset; addr < s_addr + pos; + for (addr = s_addr + offset; addr < s_addr + PAGE_SIZE; addr += class->size) { - head = obj_to_head(page, addr); - if (head & OBJ_ALLOCATED_TAG) { - handle = head & ~OBJ_ALLOCATED_TAG; - BUG_ON(!testpin_tag(handle)); + if (obj_allocated(page, addr, &handle)) { old_obj = handle_to_obj(handle); obj_to_location(old_obj, &dummy, &obj_idx); new_obj = (unsigned long)location_to_obj(newpage, obj_idx); - new_obj |= BIT(HANDLE_PIN_BIT); record_obj(handle, new_obj); } } + kunmap_atomic(s_addr); replace_sub_page(class, zspage, newpage, page); - get_page(newpage); - - dec_zspage_isolation(zspage); - /* - * Page migration is done so let's putback isolated zspage to - * the list if @page is final isolated subpage in the zspage. + * Since we complete the data copy and set up new zspage structure, + * it's okay to release migration_lock. */ - if (!is_zspage_isolated(zspage)) { - /* - * We cannot race with zs_destroy_pool() here because we wait - * for isolation to hit zero before we start destroying. - * Also, we ensure that everyone can see pool->destroying before - * we start waiting. - */ - putback_zspage_deferred(pool, class, zspage); - zs_pool_dec_isolated(pool); - } + write_unlock(&pool->migrate_lock); + spin_unlock(&class->lock); + dec_zspage_isolation(zspage); + migrate_write_unlock(zspage); + get_page(newpage); if (page_zone(newpage) != page_zone(page)) { dec_zone_page_state(page, NR_ZSPAGES); inc_zone_page_state(newpage, NR_ZSPAGES); @@ -2025,55 +1918,21 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage, reset_page(page); put_page(page); - page = newpage; - - ret = MIGRATEPAGE_SUCCESS; -unpin_objects: - for (addr = s_addr + offset; addr < s_addr + pos; - addr += class->size) { - head = obj_to_head(page, addr); - if (head & OBJ_ALLOCATED_TAG) { - handle = head & ~OBJ_ALLOCATED_TAG; - BUG_ON(!testpin_tag(handle)); - unpin_tag(handle); - } - } - kunmap_atomic(s_addr); - spin_unlock(&class->lock); - migrate_write_unlock(zspage); - return ret; + return MIGRATEPAGE_SUCCESS; } static void zs_page_putback(struct page *page) { - struct zs_pool *pool; - struct size_class *class; - int class_idx; - enum fullness_group fg; - struct address_space *mapping; struct zspage *zspage; VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); zspage = get_zspage(page); - get_zspage_mapping(zspage, &class_idx, &fg); - mapping = page_mapping(page); - pool = mapping->private_data; - class = pool->size_class[class_idx]; - - spin_lock(&class->lock); + migrate_write_lock(zspage); dec_zspage_isolation(zspage); - if (!is_zspage_isolated(zspage)) { - /* - * Due to page_lock, we cannot free zspage immediately - * so let's defer. - */ - putback_zspage_deferred(pool, class, zspage); - zs_pool_dec_isolated(pool); - } - spin_unlock(&class->lock); + migrate_write_unlock(zspage); } static const struct address_space_operations zsmalloc_aops = { @@ -2095,36 +1954,8 @@ static int zs_register_migration(struct zs_pool *pool) return 0; } -static bool pool_isolated_are_drained(struct zs_pool *pool) -{ - return atomic_long_read(&pool->isolated_pages) == 0; -} - -/* Function for resolving migration */ -static void wait_for_isolated_drain(struct zs_pool *pool) -{ - - /* - * We're in the process of destroying the pool, so there are no - * active allocations. zs_page_isolate() fails for completely free - * zspages, so we need only wait for the zs_pool's isolated - * count to hit zero. - */ - wait_event(pool->migration_wait, - pool_isolated_are_drained(pool)); -} - static void zs_unregister_migration(struct zs_pool *pool) { - pool->destroying = true; - /* - * We need a memory barrier here to ensure global visibility of - * pool->destroying. Thus pool->isolated pages will either be 0 in which - * case we don't care, or it will be > 0 and pool->destroying will - * ensure that we wake up once isolation hits 0. - */ - smp_mb(); - wait_for_isolated_drain(pool); /* This can block */ flush_work(&pool->free_work); iput(pool->inode); } @@ -2154,7 +1985,6 @@ static void async_free_zspage(struct work_struct *work) spin_unlock(&class->lock); } - list_for_each_entry_safe(zspage, tmp, &free_pages, list) { list_del(&zspage->list); lock_zspage(zspage); @@ -2218,8 +2048,13 @@ static unsigned long __zs_compact(struct zs_pool *pool, struct zspage *dst_zspage = NULL; unsigned long pages_freed = 0; + /* protect the race between zpage migration and zs_free */ + write_lock(&pool->migrate_lock); + /* protect zpage allocation/free */ spin_lock(&class->lock); while ((src_zspage = isolate_zspage(class, true))) { + /* protect someone accessing the zspage(i.e., zs_map_object) */ + migrate_write_lock(src_zspage); if (!zs_can_compact(class)) break; @@ -2228,6 +2063,8 @@ static unsigned long __zs_compact(struct zs_pool *pool, cc.s_page = get_first_page(src_zspage); while ((dst_zspage = isolate_zspage(class, false))) { + migrate_write_lock_nested(dst_zspage); + cc.d_page = get_first_page(dst_zspage); /* * If there is no more space in dst_page, resched @@ -2237,6 +2074,10 @@ static unsigned long __zs_compact(struct zs_pool *pool, break; putback_zspage(class, dst_zspage); + migrate_write_unlock(dst_zspage); + dst_zspage = NULL; + if (rwlock_is_contended(&pool->migrate_lock)) + break; } /* Stop if we couldn't find slot */ @@ -2244,19 +2085,28 @@ static unsigned long __zs_compact(struct zs_pool *pool, break; putback_zspage(class, dst_zspage); + migrate_write_unlock(dst_zspage); + if (putback_zspage(class, src_zspage) == ZS_EMPTY) { + migrate_write_unlock(src_zspage); free_zspage(pool, class, src_zspage); pages_freed += class->pages_per_zspage; - } + } else + migrate_write_unlock(src_zspage); spin_unlock(&class->lock); + write_unlock(&pool->migrate_lock); cond_resched(); + write_lock(&pool->migrate_lock); spin_lock(&class->lock); } - if (src_zspage) + if (src_zspage) { putback_zspage(class, src_zspage); + migrate_write_unlock(src_zspage); + } spin_unlock(&class->lock); + write_unlock(&pool->migrate_lock); return pages_freed; } @@ -2362,15 +2212,12 @@ struct zs_pool *zs_create_pool(const char *name) return NULL; init_deferred_free(pool); + rwlock_init(&pool->migrate_lock); pool->name = kstrdup(name, GFP_KERNEL); if (!pool->name) goto err; -#ifdef CONFIG_COMPACTION - init_waitqueue_head(&pool->migration_wait); -#endif - if (create_cache(pool)) goto err; diff --git a/mm/zswap.c b/mm/zswap.c index 7944e3e57e781525738ad1e4ab8ffdf49086eef4..cdf6950fcb2e39718864e38fddab7aa22feaa818 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -1378,7 +1378,7 @@ static void zswap_frontswap_init(unsigned type) zswap_trees[type] = tree; } -static struct frontswap_ops zswap_frontswap_ops = { +static const struct frontswap_ops zswap_frontswap_ops = { .store = zswap_frontswap_store, .load = zswap_frontswap_load, .invalidate_page = zswap_frontswap_invalidate_page, @@ -1475,11 +1475,15 @@ static int __init init_zswap(void) if (!shrink_wq) goto fallback_fail; - frontswap_register_ops(&zswap_frontswap_ops); + ret = frontswap_register_ops(&zswap_frontswap_ops); + if (ret) + goto destroy_wq; if (zswap_debugfs_init()) pr_warn("debugfs initialization failed\n"); return 0; +destroy_wq: + destroy_workqueue(shrink_wq); fallback_fail: if (pool) zswap_pool_destroy(pool); diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 1a705a4ef7fa8ead466b0f12fa88b5f2d9b0081f..5eaf38875554b0dcfdf2bba4c00f42da2786f2b1 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -129,6 +129,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); int vlan_dev_set_egress_priority(const struct net_device *dev, u32 skb_prio, u16 vlan_prio); +void vlan_dev_free_egress_priority(const struct net_device *dev); int vlan_dev_change_flags(const struct net_device *dev, u32 flag, u32 mask); void vlan_dev_get_realdev_name(const struct net_device *dev, char *result, size_t size); @@ -139,7 +140,6 @@ int vlan_check_real_dev(struct net_device *real_dev, void vlan_setup(struct net_device *dev); int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); -void vlan_dev_uninit(struct net_device *dev); bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 26d031a43cc1a70aa94a5f3efe895efacb612cad..d1902828a18aaf7b67ccbdfdf288b705a4a65e62 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -622,7 +622,7 @@ static int vlan_dev_init(struct net_device *dev) } /* Note: this function might be called multiple times for the same device. */ -void vlan_dev_uninit(struct net_device *dev) +void vlan_dev_free_egress_priority(const struct net_device *dev) { struct vlan_priority_tci_mapping *pm; struct vlan_dev_priv *vlan = vlan_dev_priv(dev); @@ -636,6 +636,16 @@ void vlan_dev_uninit(struct net_device *dev) } } +static void vlan_dev_uninit(struct net_device *dev) +{ + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + + vlan_dev_free_egress_priority(dev); + + /* Get rid of the vlan's reference to real_dev */ + dev_put_track(vlan->real_dev, &vlan->dev_tracker); +} + static netdev_features_t vlan_dev_fix_features(struct net_device *dev, netdev_features_t features) { @@ -846,9 +856,6 @@ static void vlan_dev_free(struct net_device *dev) free_percpu(vlan->vlan_pcpu_stats); vlan->vlan_pcpu_stats = NULL; - - /* Get rid of the vlan's reference to real_dev */ - dev_put_track(vlan->real_dev, &vlan->dev_tracker); } void vlan_setup(struct net_device *dev) diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 0db85aeb119b88fae1abaf7d0a10019ba4044915..53b1955b027f89245eb8dd46ede9d7bfd6e553a3 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -183,10 +183,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, return -EINVAL; err = vlan_changelink(dev, tb, data, extack); - if (!err) - err = register_vlan_dev(dev, extack); if (err) - vlan_dev_uninit(dev); + return err; + err = register_vlan_dev(dev, extack); + if (err) + vlan_dev_free_egress_priority(dev); return err; } diff --git a/net/atm/proc.c b/net/atm/proc.c index 4369ffa3302a26cdd575a7045602548199d39300..9bf736290e48c9e122bf62ea247966a3a75c2959 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -108,7 +108,7 @@ out: static inline void *vcc_walk(struct seq_file *seq, loff_t l) { struct vcc_state *state = seq->private; - int family = (uintptr_t)(PDE_DATA(file_inode(seq->file))); + int family = (uintptr_t)(pde_data(file_inode(seq->file))); return __vcc_walk(&state->sk, family, &state->bucket, l) ? state : NULL; @@ -324,7 +324,7 @@ static ssize_t proc_dev_atm_read(struct file *file, char __user *buf, page = get_zeroed_page(GFP_KERNEL); if (!page) return -ENOMEM; - dev = PDE_DATA(file_inode(file)); + dev = pde_data(file_inode(file)); if (!dev->ops->proc_read) length = -EINVAL; else { diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 02f43f3e2c5640791aa013b0afc902049e44e26d..d53cbb4e25035f43ee57fbb671758d386e7ad9b2 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; + struct sock *sk; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; @@ -85,13 +86,17 @@ static void ax25_kill_by_device(struct net_device *dev) again: ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { + sk = s->sk; + sock_hold(sk); spin_unlock_bh(&ax25_list_lock); - lock_sock(s->sk); + lock_sock(sk); s->ax25_dev = NULL; - release_sock(s->sk); + dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + ax25_dev_put(ax25_dev); ax25_disconnect(s, ENETUNREACH); + release_sock(sk); spin_lock_bh(&ax25_list_lock); - + sock_put(sk); /* The entry could have been deleted from the * list meanwhile and thus the next pointer is * no longer valid. Play it safe and restart @@ -355,21 +360,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl))) return -EFAULT; - if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL) - return -ENODEV; - if (ax25_ctl.digi_count > AX25_MAX_DIGIS) return -EINVAL; if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL) return -EINVAL; + ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr); + if (!ax25_dev) + return -ENODEV; + digi.ndigi = ax25_ctl.digi_count; for (k = 0; k < digi.ndigi; k++) digi.calls[k] = ax25_ctl.digi_addr[k]; - if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL) + ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev); + if (!ax25) { + ax25_dev_put(ax25_dev); return -ENOTCONN; + } switch (ax25_ctl.cmd) { case AX25_KILL: @@ -436,6 +445,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg) } out_put: + ax25_dev_put(ax25_dev); ax25_cb_put(ax25); return ret; @@ -1107,8 +1117,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } } - if (ax25_dev != NULL) + if (ax25_dev) { ax25_fillin_cb(ax25, ax25_dev); + dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC); + } done: ax25_cb_add(ax25); diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index 256fadb94df3125ddc2f5ed5f8b1c91d6e243546..d2a244e1c260f3d316237f0aea2d0ec80f60570f 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -37,6 +37,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr) for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next) if (ax25cmp(addr, (const ax25_address *)ax25_dev->dev->dev_addr) == 0) { res = ax25_dev; + ax25_dev_hold(ax25_dev); } spin_unlock_bh(&ax25_dev_lock); @@ -56,6 +57,7 @@ void ax25_dev_device_up(struct net_device *dev) return; } + refcount_set(&ax25_dev->refcount, 1); dev->ax25_ptr = ax25_dev; ax25_dev->dev = dev; dev_hold_track(dev, &ax25_dev->dev_tracker, GFP_ATOMIC); @@ -84,6 +86,7 @@ void ax25_dev_device_up(struct net_device *dev) ax25_dev->next = ax25_dev_list; ax25_dev_list = ax25_dev; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_hold(ax25_dev); ax25_register_dev_sysctl(ax25_dev); } @@ -113,9 +116,10 @@ void ax25_dev_device_down(struct net_device *dev) if ((s = ax25_dev_list) == ax25_dev) { ax25_dev_list = s->next; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; dev_put_track(dev, &ax25_dev->dev_tracker); - kfree(ax25_dev); + ax25_dev_put(ax25_dev); return; } @@ -123,9 +127,10 @@ void ax25_dev_device_down(struct net_device *dev) if (s->next == ax25_dev) { s->next = ax25_dev->next; spin_unlock_bh(&ax25_dev_lock); + ax25_dev_put(ax25_dev); dev->ax25_ptr = NULL; dev_put_track(dev, &ax25_dev->dev_tracker); - kfree(ax25_dev); + ax25_dev_put(ax25_dev); return; } @@ -133,6 +138,7 @@ void ax25_dev_device_down(struct net_device *dev) } spin_unlock_bh(&ax25_dev_lock); dev->ax25_ptr = NULL; + ax25_dev_put(ax25_dev); } int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) @@ -144,20 +150,32 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd) switch (cmd) { case SIOCAX25ADDFWD: - if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL) + fwd_dev = ax25_addr_ax25dev(&fwd->port_to); + if (!fwd_dev) { + ax25_dev_put(ax25_dev); return -EINVAL; - if (ax25_dev->forward != NULL) + } + if (ax25_dev->forward) { + ax25_dev_put(fwd_dev); + ax25_dev_put(ax25_dev); return -EINVAL; + } ax25_dev->forward = fwd_dev->dev; + ax25_dev_put(fwd_dev); + ax25_dev_put(ax25_dev); break; case SIOCAX25DELFWD: - if (ax25_dev->forward == NULL) + if (!ax25_dev->forward) { + ax25_dev_put(ax25_dev); return -EINVAL; + } ax25_dev->forward = NULL; + ax25_dev_put(ax25_dev); break; default: + ax25_dev_put(ax25_dev); return -EINVAL; } diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c index d0b2e094bd552f8fa6d767c02a87a9bf317dd709..9751207f7757270d2fa6d727c917f4930bc60f78 100644 --- a/net/ax25/ax25_route.c +++ b/net/ax25/ax25_route.c @@ -75,11 +75,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_dev *ax25_dev; int i; - if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL) - return -EINVAL; if (route->digi_count > AX25_MAX_DIGIS) return -EINVAL; + ax25_dev = ax25_addr_ax25dev(&route->port_addr); + if (!ax25_dev) + return -EINVAL; + write_lock_bh(&ax25_route_lock); ax25_rt = ax25_route_list; @@ -91,6 +93,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if (route->digi_count != 0) { if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -101,6 +104,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } ax25_rt = ax25_rt->next; @@ -108,6 +112,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return -ENOMEM; } @@ -120,6 +125,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) { write_unlock_bh(&ax25_route_lock); kfree(ax25_rt); + ax25_dev_put(ax25_dev); return -ENOMEM; } ax25_rt->digipeat->lastrepeat = -1; @@ -132,6 +138,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route) ax25_rt->next = ax25_route_list; ax25_route_list = ax25_rt; write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } @@ -173,6 +180,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route) } } write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return 0; } @@ -215,6 +223,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option) out: write_unlock_bh(&ax25_route_lock); + ax25_dev_put(ax25_dev); return err; } diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 1661979b6a6e8d0f4f64d0df0ded0536cc7918c7..ee319779781e64657d6e7c3090086c94a5ec6cb7 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -611,7 +611,7 @@ EXPORT_SYMBOL(bt_sock_wait_ready); static void *bt_seq_start(struct seq_file *seq, loff_t *pos) __acquires(seq->private->l->lock) { - struct bt_sock_list *l = PDE_DATA(file_inode(seq->file)); + struct bt_sock_list *l = pde_data(file_inode(seq->file)); read_lock(&l->lock); return seq_hlist_start_head(&l->head, *pos); @@ -619,7 +619,7 @@ static void *bt_seq_start(struct seq_file *seq, loff_t *pos) static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct bt_sock_list *l = PDE_DATA(file_inode(seq->file)); + struct bt_sock_list *l = pde_data(file_inode(seq->file)); return seq_hlist_next(v, &l->head, pos); } @@ -627,14 +627,14 @@ static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void bt_seq_stop(struct seq_file *seq, void *v) __releases(seq->private->l->lock) { - struct bt_sock_list *l = PDE_DATA(file_inode(seq->file)); + struct bt_sock_list *l = pde_data(file_inode(seq->file)); read_unlock(&l->lock); } static int bt_seq_show(struct seq_file *seq, void *v) { - struct bt_sock_list *l = PDE_DATA(file_inode(seq->file)); + struct bt_sock_list *l = pde_data(file_inode(seq->file)); if (v == SEQ_START_TOKEN) { seq_puts(seq, "sk RefCnt Rmem Wmem User Inode Parent"); diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 84ba456a78cce0484573ca6ab2c56ea611e5a731..1402d5ca242dc3d74c74b12d897efe56e6a92cb8 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -560,10 +560,10 @@ static bool __allowed_ingress(const struct net_bridge *br, !br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { if (*state == BR_STATE_FORWARDING) { *state = br_vlan_get_pvid_state(vg); - return br_vlan_state_allowed(*state, true); - } else { - return true; + if (!br_vlan_state_allowed(*state, true)) + goto drop; } + return true; } } v = br_vlan_find(vg, *vid); @@ -2020,7 +2020,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out_err; } err = br_vlan_dump_dev(dev, skb, cb, dump_flags); - if (err && err != -EMSGSIZE) + /* if the dump completed without an error we return 0 here */ + if (err != -EMSGSIZE) goto out_err; } else { for_each_netdev_rcu(net, dev) { diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index eba0efe64d05a7da6033762517a6be6335c0d1f8..fbf858ddec352183b5c7dcb6e2611ca29a50845f 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -49,7 +49,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, dev, hook); + nskb = nf_reject_skb_v4_tcp_reset(net, oldskb, NULL, hook); if (!nskb) return; @@ -65,7 +65,7 @@ static void nft_reject_br_send_v4_unreach(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v4_unreach(net, oldskb, dev, hook, code); + nskb = nf_reject_skb_v4_unreach(net, oldskb, NULL, hook, code); if (!nskb) return; @@ -81,7 +81,7 @@ static void nft_reject_br_send_v6_tcp_reset(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, dev, hook); + nskb = nf_reject_skb_v6_tcp_reset(net, oldskb, NULL, hook); if (!nskb) return; @@ -98,7 +98,7 @@ static void nft_reject_br_send_v6_unreach(struct net *net, { struct sk_buff *nskb; - nskb = nf_reject_skb_v6_unreach(net, oldskb, dev, hook, code); + nskb = nf_reject_skb_v6_unreach(net, oldskb, NULL, hook, code); if (!nskb) return; diff --git a/net/can/bcm.c b/net/can/bcm.c index bc88d901a1c0c8d3d28a5d0ce7ed63c3528efd82..95d209b52e6a6df1b3df9504afc2c486c8271961 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -193,7 +193,7 @@ static int bcm_proc_show(struct seq_file *m, void *v) { char ifname[IFNAMSIZ]; struct net *net = m->private; - struct sock *sk = (struct sock *)PDE_DATA(m->file->f_inode); + struct sock *sk = (struct sock *)pde_data(m->file->f_inode); struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; diff --git a/net/can/isotp.c b/net/can/isotp.c index 02cbcb2ecf0dbc58f458fd6127044113e4a368ba..d2a430b6a13bd2974adf02be1fe14e1886db7439 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -145,6 +146,7 @@ struct isotp_sock { struct tpcon rx, tx; struct list_head notifier; wait_queue_head_t wait; + spinlock_t rx_lock; /* protect single thread state machine */ }; static LIST_HEAD(isotp_notifier_list); @@ -615,11 +617,17 @@ static void isotp_rcv(struct sk_buff *skb, void *data) n_pci_type = cf->data[ae] & 0xF0; + /* Make sure the state changes and data structures stay consistent at + * CAN frame reception time. This locking is not needed in real world + * use cases but the inconsistency can be triggered with syzkaller. + */ + spin_lock(&so->rx_lock); + if (so->opt.flags & CAN_ISOTP_HALF_DUPLEX) { /* check rx/tx path half duplex expectations */ if ((so->tx.state != ISOTP_IDLE && n_pci_type != N_PCI_FC) || (so->rx.state != ISOTP_IDLE && n_pci_type == N_PCI_FC)) - return; + goto out_unlock; } switch (n_pci_type) { @@ -668,6 +676,9 @@ static void isotp_rcv(struct sk_buff *skb, void *data) isotp_rcv_cf(sk, cf, ae, skb); break; } + +out_unlock: + spin_unlock(&so->rx_lock); } static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so, @@ -876,7 +887,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (!size || size > MAX_MSG_LENGTH) { err = -EINVAL; - goto err_out; + goto err_out_drop; } /* take care of a potential SF_DL ESC offset for TX_DL > 8 */ @@ -886,24 +897,24 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) && (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) { err = -EINVAL; - goto err_out; + goto err_out_drop; } err = memcpy_from_msg(so->tx.buf, msg, size); if (err < 0) - goto err_out; + goto err_out_drop; dev = dev_get_by_index(sock_net(sk), so->ifindex); if (!dev) { err = -ENXIO; - goto err_out; + goto err_out_drop; } skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv), msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) { dev_put(dev); - goto err_out; + goto err_out_drop; } can_skb_reserve(skb); @@ -965,7 +976,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (err) { pr_notice_once("can-isotp: %s: can_send_ret %pe\n", __func__, ERR_PTR(err)); - goto err_out; + goto err_out_drop; } if (wait_tx_done) { @@ -978,6 +989,9 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) return size; +err_out_drop: + /* drop this PDU and unlock a potential wait queue */ + old_state = ISOTP_IDLE; err_out: so->tx.state = old_state; if (so->tx.state == ISOTP_IDLE) @@ -1444,6 +1458,7 @@ static int isotp_init(struct sock *sk) so->txtimer.function = isotp_tx_timer_handler; init_waitqueue_head(&so->wait); + spin_lock_init(&so->rx_lock); spin_lock(&isotp_notifier_lock); list_add_tail(&so->notifier, &isotp_notifier_list); diff --git a/net/can/proc.c b/net/can/proc.c index b3099f0a3cb824772e21f7c4bc0c2d1392669dee..bbce97825f13fbf725261e572271a9353d51897d 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -305,7 +305,7 @@ static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, static int can_rcvlist_proc_show(struct seq_file *m, void *v) { /* double cast to prevent GCC warning */ - int idx = (int)(long)PDE_DATA(m->file->f_inode); + int idx = (int)(long)pde_data(m->file->f_inode); struct net_device *dev; struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ecc400a0b7bbf0bd921de1ae93c521219193c3e1..4c6441536d55b6323f4b9d93b5d4837cd4ec880c 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -246,6 +246,7 @@ enum { Opt_cephx_sign_messages, Opt_tcp_nodelay, Opt_abort_on_full, + Opt_rxbounce, }; enum { @@ -295,6 +296,7 @@ static const struct fs_parameter_spec ceph_parameters[] = { fsparam_u32 ("osdkeepalive", Opt_osdkeepalivetimeout), fsparam_enum ("read_from_replica", Opt_read_from_replica, ceph_param_read_from_replica), + fsparam_flag ("rxbounce", Opt_rxbounce), fsparam_enum ("ms_mode", Opt_ms_mode, ceph_param_ms_mode), fsparam_string ("secret", Opt_secret), @@ -584,6 +586,9 @@ int ceph_parse_param(struct fs_parameter *param, struct ceph_options *opt, case Opt_abort_on_full: opt->flags |= CEPH_OPT_ABORT_ON_FULL; break; + case Opt_rxbounce: + opt->flags |= CEPH_OPT_RXBOUNCE; + break; default: BUG(); @@ -660,6 +665,8 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client, seq_puts(m, "notcp_nodelay,"); if (show_all && (opt->flags & CEPH_OPT_ABORT_ON_FULL)) seq_puts(m, "abort_on_full,"); + if (opt->flags & CEPH_OPT_RXBOUNCE) + seq_puts(m, "rxbounce,"); if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) seq_printf(m, "mount_timeout=%d,", diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 45eba2dcb67aab0054f243c28771a5a4602f9128..d3bb656308b432e0abc3ce2a3c221f40470bd5ef 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -515,6 +515,10 @@ static void ceph_con_reset_protocol(struct ceph_connection *con) ceph_msg_put(con->out_msg); con->out_msg = NULL; } + if (con->bounce_page) { + __free_page(con->bounce_page); + con->bounce_page = NULL; + } if (ceph_msgr2(from_msgr(con->msgr))) ceph_con_v2_reset_protocol(con); diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c index 2cb5ffdf071af76a679a0f1cdae2f7b1a13a909d..6b014eca3a1305e48aeb62d533951ad98c68ad78 100644 --- a/net/ceph/messenger_v1.c +++ b/net/ceph/messenger_v1.c @@ -992,8 +992,7 @@ static int read_partial_message_section(struct ceph_connection *con, static int read_partial_msg_data(struct ceph_connection *con) { - struct ceph_msg *msg = con->in_msg; - struct ceph_msg_data_cursor *cursor = &msg->cursor; + struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); struct page *page; size_t page_offset; @@ -1001,9 +1000,6 @@ static int read_partial_msg_data(struct ceph_connection *con) u32 crc = 0; int ret; - if (!msg->num_data_items) - return -EIO; - if (do_datacrc) crc = con->in_data_crc; while (cursor->total_resid) { @@ -1031,6 +1027,46 @@ static int read_partial_msg_data(struct ceph_connection *con) return 1; /* must return > 0 to indicate success */ } +static int read_partial_msg_data_bounce(struct ceph_connection *con) +{ + struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; + struct page *page; + size_t off, len; + u32 crc; + int ret; + + if (unlikely(!con->bounce_page)) { + con->bounce_page = alloc_page(GFP_NOIO); + if (!con->bounce_page) { + pr_err("failed to allocate bounce page\n"); + return -ENOMEM; + } + } + + crc = con->in_data_crc; + while (cursor->total_resid) { + if (!cursor->resid) { + ceph_msg_data_advance(cursor, 0); + continue; + } + + page = ceph_msg_data_next(cursor, &off, &len, NULL); + ret = ceph_tcp_recvpage(con->sock, con->bounce_page, 0, len); + if (ret <= 0) { + con->in_data_crc = crc; + return ret; + } + + crc = crc32c(crc, page_address(con->bounce_page), ret); + memcpy_to_page(page, off, page_address(con->bounce_page), ret); + + ceph_msg_data_advance(cursor, ret); + } + con->in_data_crc = crc; + + return 1; /* must return > 0 to indicate success */ +} + /* * read (part of) a message. */ @@ -1141,7 +1177,13 @@ static int read_partial_message(struct ceph_connection *con) /* (page) data */ if (data_len) { - ret = read_partial_msg_data(con); + if (!m->num_data_items) + return -EIO; + + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) + ret = read_partial_msg_data_bounce(con); + else + ret = read_partial_msg_data(con); if (ret <= 0) return ret; } diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c index c4099b641b381c191625c2ddefb1e6564d3ca036..c81379f93ad51e4dd5b37f71f38f8e16affd4f19 100644 --- a/net/ceph/messenger_v2.c +++ b/net/ceph/messenger_v2.c @@ -57,8 +57,9 @@ #define IN_S_HANDLE_CONTROL_REMAINDER 3 #define IN_S_PREPARE_READ_DATA 4 #define IN_S_PREPARE_READ_DATA_CONT 5 -#define IN_S_HANDLE_EPILOGUE 6 -#define IN_S_FINISH_SKIP 7 +#define IN_S_PREPARE_READ_ENC_PAGE 6 +#define IN_S_HANDLE_EPILOGUE 7 +#define IN_S_FINISH_SKIP 8 #define OUT_S_QUEUE_DATA 1 #define OUT_S_QUEUE_DATA_CONT 2 @@ -1032,22 +1033,41 @@ static int decrypt_control_remainder(struct ceph_connection *con) padded_len(rem_len) + CEPH_GCM_TAG_LEN); } -static int decrypt_message(struct ceph_connection *con) +static int decrypt_tail(struct ceph_connection *con) { + struct sg_table enc_sgt = {}; struct sg_table sgt = {}; + int tail_len; int ret; + tail_len = tail_onwire_len(con->in_msg, true); + ret = sg_alloc_table_from_pages(&enc_sgt, con->v2.in_enc_pages, + con->v2.in_enc_page_cnt, 0, tail_len, + GFP_NOIO); + if (ret) + goto out; + ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf), MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf), con->v2.in_buf, true); if (ret) goto out; - ret = gcm_crypt(con, false, sgt.sgl, sgt.sgl, - tail_onwire_len(con->in_msg, true)); + dout("%s con %p msg %p enc_page_cnt %d sg_cnt %d\n", __func__, con, + con->in_msg, con->v2.in_enc_page_cnt, sgt.orig_nents); + ret = gcm_crypt(con, false, enc_sgt.sgl, sgt.sgl, tail_len); + if (ret) + goto out; + + WARN_ON(!con->v2.in_enc_page_cnt); + ceph_release_page_vector(con->v2.in_enc_pages, + con->v2.in_enc_page_cnt); + con->v2.in_enc_pages = NULL; + con->v2.in_enc_page_cnt = 0; out: sg_free_table(&sgt); + sg_free_table(&enc_sgt); return ret; } @@ -1733,54 +1753,157 @@ static int prepare_read_control_remainder(struct ceph_connection *con) return 0; } -static void prepare_read_data(struct ceph_connection *con) +static int prepare_read_data(struct ceph_connection *con) { struct bio_vec bv; - if (!con_secure(con)) - con->in_data_crc = -1; + con->in_data_crc = -1; ceph_msg_data_cursor_init(&con->v2.in_cursor, con->in_msg, data_len(con->in_msg)); get_bvec_at(&con->v2.in_cursor, &bv); - set_in_bvec(con, &bv); + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + if (unlikely(!con->bounce_page)) { + con->bounce_page = alloc_page(GFP_NOIO); + if (!con->bounce_page) { + pr_err("failed to allocate bounce page\n"); + return -ENOMEM; + } + } + + bv.bv_page = con->bounce_page; + bv.bv_offset = 0; + set_in_bvec(con, &bv); + } else { + set_in_bvec(con, &bv); + } con->v2.in_state = IN_S_PREPARE_READ_DATA_CONT; + return 0; } static void prepare_read_data_cont(struct ceph_connection *con) { struct bio_vec bv; - if (!con_secure(con)) + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + con->in_data_crc = crc32c(con->in_data_crc, + page_address(con->bounce_page), + con->v2.in_bvec.bv_len); + + get_bvec_at(&con->v2.in_cursor, &bv); + memcpy_to_page(bv.bv_page, bv.bv_offset, + page_address(con->bounce_page), + con->v2.in_bvec.bv_len); + } else { con->in_data_crc = ceph_crc32c_page(con->in_data_crc, con->v2.in_bvec.bv_page, con->v2.in_bvec.bv_offset, con->v2.in_bvec.bv_len); + } ceph_msg_data_advance(&con->v2.in_cursor, con->v2.in_bvec.bv_len); if (con->v2.in_cursor.total_resid) { get_bvec_at(&con->v2.in_cursor, &bv); - set_in_bvec(con, &bv); + if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) { + bv.bv_page = con->bounce_page; + bv.bv_offset = 0; + set_in_bvec(con, &bv); + } else { + set_in_bvec(con, &bv); + } WARN_ON(con->v2.in_state != IN_S_PREPARE_READ_DATA_CONT); return; } /* - * We've read all data. Prepare to read data padding (if any) - * and epilogue. + * We've read all data. Prepare to read epilogue. */ reset_in_kvecs(con); - if (con_secure(con)) { - if (need_padding(data_len(con->in_msg))) - add_in_kvec(con, DATA_PAD(con->v2.in_buf), - padding_len(data_len(con->in_msg))); - add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_SECURE_LEN); + add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); + con->v2.in_state = IN_S_HANDLE_EPILOGUE; +} + +static int prepare_read_tail_plain(struct ceph_connection *con) +{ + struct ceph_msg *msg = con->in_msg; + + if (!front_len(msg) && !middle_len(msg)) { + WARN_ON(!data_len(msg)); + return prepare_read_data(con); + } + + reset_in_kvecs(con); + if (front_len(msg)) { + add_in_kvec(con, msg->front.iov_base, front_len(msg)); + WARN_ON(msg->front.iov_len != front_len(msg)); + } + if (middle_len(msg)) { + add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg)); + WARN_ON(msg->middle->vec.iov_len != middle_len(msg)); + } + + if (data_len(msg)) { + con->v2.in_state = IN_S_PREPARE_READ_DATA; } else { add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN); + con->v2.in_state = IN_S_HANDLE_EPILOGUE; } + return 0; +} + +static void prepare_read_enc_page(struct ceph_connection *con) +{ + struct bio_vec bv; + + dout("%s con %p i %d resid %d\n", __func__, con, con->v2.in_enc_i, + con->v2.in_enc_resid); + WARN_ON(!con->v2.in_enc_resid); + + bv.bv_page = con->v2.in_enc_pages[con->v2.in_enc_i]; + bv.bv_offset = 0; + bv.bv_len = min(con->v2.in_enc_resid, (int)PAGE_SIZE); + + set_in_bvec(con, &bv); + con->v2.in_enc_i++; + con->v2.in_enc_resid -= bv.bv_len; + + if (con->v2.in_enc_resid) { + con->v2.in_state = IN_S_PREPARE_READ_ENC_PAGE; + return; + } + + /* + * We are set to read the last piece of ciphertext (ending + * with epilogue) + auth tag. + */ + WARN_ON(con->v2.in_enc_i != con->v2.in_enc_page_cnt); con->v2.in_state = IN_S_HANDLE_EPILOGUE; } +static int prepare_read_tail_secure(struct ceph_connection *con) +{ + struct page **enc_pages; + int enc_page_cnt; + int tail_len; + + tail_len = tail_onwire_len(con->in_msg, true); + WARN_ON(!tail_len); + + enc_page_cnt = calc_pages_for(0, tail_len); + enc_pages = ceph_alloc_page_vector(enc_page_cnt, GFP_NOIO); + if (IS_ERR(enc_pages)) + return PTR_ERR(enc_pages); + + WARN_ON(con->v2.in_enc_pages || con->v2.in_enc_page_cnt); + con->v2.in_enc_pages = enc_pages; + con->v2.in_enc_page_cnt = enc_page_cnt; + con->v2.in_enc_resid = tail_len; + con->v2.in_enc_i = 0; + + prepare_read_enc_page(con); + return 0; +} + static void __finish_skip(struct ceph_connection *con) { con->in_seq++; @@ -2589,47 +2712,26 @@ static int __handle_control(struct ceph_connection *con, void *p) } msg = con->in_msg; /* set in process_message_header() */ - if (!front_len(msg) && !middle_len(msg)) { - if (!data_len(msg)) - return process_message(con); - - prepare_read_data(con); - return 0; - } - - reset_in_kvecs(con); if (front_len(msg)) { WARN_ON(front_len(msg) > msg->front_alloc_len); - add_in_kvec(con, msg->front.iov_base, front_len(msg)); msg->front.iov_len = front_len(msg); - - if (con_secure(con) && need_padding(front_len(msg))) - add_in_kvec(con, FRONT_PAD(con->v2.in_buf), - padding_len(front_len(msg))); } else { msg->front.iov_len = 0; } if (middle_len(msg)) { WARN_ON(middle_len(msg) > msg->middle->alloc_len); - add_in_kvec(con, msg->middle->vec.iov_base, middle_len(msg)); msg->middle->vec.iov_len = middle_len(msg); - - if (con_secure(con) && need_padding(middle_len(msg))) - add_in_kvec(con, MIDDLE_PAD(con->v2.in_buf), - padding_len(middle_len(msg))); } else if (msg->middle) { msg->middle->vec.iov_len = 0; } - if (data_len(msg)) { - con->v2.in_state = IN_S_PREPARE_READ_DATA; - } else { - add_in_kvec(con, con->v2.in_buf, - con_secure(con) ? CEPH_EPILOGUE_SECURE_LEN : - CEPH_EPILOGUE_PLAIN_LEN); - con->v2.in_state = IN_S_HANDLE_EPILOGUE; - } - return 0; + if (!front_len(msg) && !middle_len(msg) && !data_len(msg)) + return process_message(con); + + if (con_secure(con)) + return prepare_read_tail_secure(con); + + return prepare_read_tail_plain(con); } static int handle_preamble(struct ceph_connection *con) @@ -2717,7 +2819,7 @@ static int handle_epilogue(struct ceph_connection *con) int ret; if (con_secure(con)) { - ret = decrypt_message(con); + ret = decrypt_tail(con); if (ret) { if (ret == -EBADMSG) con->error_msg = "integrity error, bad epilogue auth tag"; @@ -2785,13 +2887,16 @@ static int populate_in_iter(struct ceph_connection *con) ret = handle_control_remainder(con); break; case IN_S_PREPARE_READ_DATA: - prepare_read_data(con); - ret = 0; + ret = prepare_read_data(con); break; case IN_S_PREPARE_READ_DATA_CONT: prepare_read_data_cont(con); ret = 0; break; + case IN_S_PREPARE_READ_ENC_PAGE: + prepare_read_enc_page(con); + ret = 0; + break; case IN_S_HANDLE_EPILOGUE: ret = handle_epilogue(con); break; @@ -3326,20 +3431,16 @@ void ceph_con_v2_revoke(struct ceph_connection *con) static void revoke_at_prepare_read_data(struct ceph_connection *con) { - int remaining; /* data + [data padding] + epilogue */ + int remaining; int resid; + WARN_ON(con_secure(con)); WARN_ON(!data_len(con->in_msg)); WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter)); resid = iov_iter_count(&con->v2.in_iter); WARN_ON(!resid); - if (con_secure(con)) - remaining = padded_len(data_len(con->in_msg)) + - CEPH_EPILOGUE_SECURE_LEN; - else - remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN; - + remaining = data_len(con->in_msg) + CEPH_EPILOGUE_PLAIN_LEN; dout("%s con %p resid %d remaining %d\n", __func__, con, resid, remaining); con->v2.in_iter.count -= resid; @@ -3350,8 +3451,9 @@ static void revoke_at_prepare_read_data(struct ceph_connection *con) static void revoke_at_prepare_read_data_cont(struct ceph_connection *con) { int recved, resid; /* current piece of data */ - int remaining; /* [data padding] + epilogue */ + int remaining; + WARN_ON(con_secure(con)); WARN_ON(!data_len(con->in_msg)); WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter)); resid = iov_iter_count(&con->v2.in_iter); @@ -3363,12 +3465,7 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con) ceph_msg_data_advance(&con->v2.in_cursor, recved); WARN_ON(resid > con->v2.in_cursor.total_resid); - if (con_secure(con)) - remaining = padding_len(data_len(con->in_msg)) + - CEPH_EPILOGUE_SECURE_LEN; - else - remaining = CEPH_EPILOGUE_PLAIN_LEN; - + remaining = CEPH_EPILOGUE_PLAIN_LEN; dout("%s con %p total_resid %zu remaining %d\n", __func__, con, con->v2.in_cursor.total_resid, remaining); con->v2.in_iter.count -= resid; @@ -3376,11 +3473,26 @@ static void revoke_at_prepare_read_data_cont(struct ceph_connection *con) con->v2.in_state = IN_S_FINISH_SKIP; } +static void revoke_at_prepare_read_enc_page(struct ceph_connection *con) +{ + int resid; /* current enc page (not necessarily data) */ + + WARN_ON(!con_secure(con)); + WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter)); + resid = iov_iter_count(&con->v2.in_iter); + WARN_ON(!resid || resid > con->v2.in_bvec.bv_len); + + dout("%s con %p resid %d enc_resid %d\n", __func__, con, resid, + con->v2.in_enc_resid); + con->v2.in_iter.count -= resid; + set_in_skip(con, resid + con->v2.in_enc_resid); + con->v2.in_state = IN_S_FINISH_SKIP; +} + static void revoke_at_handle_epilogue(struct ceph_connection *con) { int resid; - WARN_ON(!iov_iter_is_kvec(&con->v2.in_iter)); resid = iov_iter_count(&con->v2.in_iter); WARN_ON(!resid); @@ -3399,6 +3511,9 @@ void ceph_con_v2_revoke_incoming(struct ceph_connection *con) case IN_S_PREPARE_READ_DATA_CONT: revoke_at_prepare_read_data_cont(con); break; + case IN_S_PREPARE_READ_ENC_PAGE: + revoke_at_prepare_read_enc_page(con); + break; case IN_S_HANDLE_EPILOGUE: revoke_at_handle_epilogue(con); break; @@ -3432,6 +3547,13 @@ void ceph_con_v2_reset_protocol(struct ceph_connection *con) clear_out_sign_kvecs(con); free_conn_bufs(con); + if (con->v2.in_enc_pages) { + WARN_ON(!con->v2.in_enc_page_cnt); + ceph_release_page_vector(con->v2.in_enc_pages, + con->v2.in_enc_page_cnt); + con->v2.in_enc_pages = NULL; + con->v2.in_enc_page_cnt = 0; + } if (con->v2.out_enc_pages) { WARN_ON(!con->v2.out_enc_page_cnt); ceph_release_page_vector(con->v2.out_enc_pages, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 213cb7b26b7a6ffce3ccb312690f77fdac49a501..ec0bf737b076ac8f529248ba68d9138766115e47 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1133,7 +1133,8 @@ out: neigh_release(neigh); } -int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) +int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb, + const bool immediate_ok) { int rc; bool immediate_probe = false; @@ -1154,12 +1155,17 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) atomic_set(&neigh->probes, NEIGH_VAR(neigh->parms, UCAST_PROBES)); neigh_del_timer(neigh); - neigh->nud_state = NUD_INCOMPLETE; + neigh->nud_state = NUD_INCOMPLETE; neigh->updated = now; - next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), - HZ/100); + if (!immediate_ok) { + next = now + 1; + } else { + immediate_probe = true; + next = now + max(NEIGH_VAR(neigh->parms, + RETRANS_TIME), + HZ / 100); + } neigh_add_timer(neigh, next); - immediate_probe = true; } else { neigh->nud_state = NUD_FAILED; neigh->updated = jiffies; @@ -1571,7 +1577,7 @@ static void neigh_managed_work(struct work_struct *work) write_lock_bh(&tbl->lock); list_for_each_entry(neigh, &tbl->managed_list, managed_list) - neigh_event_send(neigh, NULL); + neigh_event_send_probe(neigh, NULL, false); queue_delayed_work(system_power_efficient_wq, &tbl->managed_work, NEIGH_VAR(&tbl->parms, DELAY_PROBE_TIME)); write_unlock_bh(&tbl->lock); @@ -3364,7 +3370,7 @@ EXPORT_SYMBOL(neigh_seq_stop); static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) { - struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); + struct neigh_table *tbl = pde_data(file_inode(seq->file)); int cpu; if (*pos == 0) @@ -3381,7 +3387,7 @@ static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); + struct neigh_table *tbl = pde_data(file_inode(seq->file)); int cpu; for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { @@ -3401,7 +3407,7 @@ static void neigh_stat_seq_stop(struct seq_file *seq, void *v) static int neigh_stat_seq_show(struct seq_file *seq, void *v) { - struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); + struct neigh_table *tbl = pde_data(file_inode(seq->file)); struct neigh_statistics *st = v; if (v == SEQ_START_TOKEN) { diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index d8b9dbabd4a439137c408fb548ea058b1349c3a1..88cc0ad7d386e825fefb6961db195e9370bfc84d 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -190,12 +190,23 @@ static const struct seq_operations softnet_seq_ops = { .show = softnet_seq_show, }; -static void *ptype_get_idx(loff_t pos) +static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { + struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; + struct net_device *dev; loff_t i = 0; int t; + for_each_netdev_rcu(seq_file_net(seq), dev) { + ptype_list = &dev->ptype_all; + list_for_each_entry_rcu(pt, ptype_list, list) { + if (i == pos) + return pt; + ++i; + } + } + list_for_each_entry_rcu(pt, &ptype_all, list) { if (i == pos) return pt; @@ -216,22 +227,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; + return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); + return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; + if (pt->dev) { + if (nxt != &pt->dev->ptype_all) + goto found; + + dev = pt->dev; + for_each_netdev_continue_rcu(seq_file_net(seq), dev) { + if (!list_empty(&dev->ptype_all)) { + nxt = dev->ptype_all.next; + goto found; + } + } + + nxt = ptype_all.next; + goto ptype_all; + } + if (pt->type == htons(ETH_P_ALL)) { +ptype_all: if (nxt != &ptype_all) goto found; hash = 0; @@ -260,7 +289,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && + (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 560a5e712dc32fe4aa2cfaa751c3fe1e462c614f..84b62cd7bc57a96c04622e99c8a48cc5b03d1b66 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -546,7 +546,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, static int pgctrl_open(struct inode *inode, struct file *file) { - return single_open(file, pgctrl_show, PDE_DATA(inode)); + return single_open(file, pgctrl_show, pde_data(inode)); } static const struct proc_ops pktgen_proc_ops = { @@ -1811,7 +1811,7 @@ static ssize_t pktgen_if_write(struct file *file, static int pktgen_if_open(struct inode *inode, struct file *file) { - return single_open(file, pktgen_if_show, PDE_DATA(inode)); + return single_open(file, pktgen_if_show, pde_data(inode)); } static const struct proc_ops pktgen_if_proc_ops = { @@ -1948,7 +1948,7 @@ out: static int pktgen_thread_open(struct inode *inode, struct file *file) { - return single_open(file, pktgen_thread_show, PDE_DATA(inode)); + return single_open(file, pktgen_thread_show, pde_data(inode)); } static const struct proc_ops pktgen_thread_proc_ops = { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e476403231f00053e1a261f31a8760325c75c941..710da8a36729d6a9293b610b029bd33aa38d6514 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3275,8 +3275,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; unsigned char name_assign_type = NET_NAME_USER; struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; - const struct rtnl_link_ops *m_ops = NULL; - struct net_device *master_dev = NULL; + const struct rtnl_link_ops *m_ops; + struct net_device *master_dev; struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct nlattr *tb[IFLA_MAX + 1]; @@ -3314,6 +3314,8 @@ replay: else dev = NULL; + master_dev = NULL; + m_ops = NULL; if (dev) { master_dev = netdev_master_upper_dev_get(dev); if (master_dev) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0118f0afaa4fce8da167ddf39de4c9f3880ca05b..9d0388bed0c1d2166214c95081f4778afe9f50ed 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -681,7 +681,7 @@ exit: * while trying to recycle fragments on __skb_frag_unref() we need * to make one SKB responsible for triggering the recycle path. * So disable the recycling bit if an SKB is cloned and we have - * additional references to to the fragmented part of the SKB. + * additional references to the fragmented part of the SKB. * Eventually the last SKB will have the recycling bit set and it's * dataref set to 0, which will trigger the recycling */ diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 3d21521453feadac58ff23b78214eef7a1671307..dcad3100b164637fd581d70ba20e4b31ae6e8a6b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -1718,7 +1718,6 @@ EXPORT_SYMBOL_GPL(dsa_unregister_switch); void dsa_switch_shutdown(struct dsa_switch *ds) { struct net_device *master, *slave_dev; - LIST_HEAD(unregister_list); struct dsa_port *dp; mutex_lock(&dsa2_mutex); @@ -1729,25 +1728,13 @@ void dsa_switch_shutdown(struct dsa_switch *ds) slave_dev = dp->slave; netdev_upper_dev_unlink(master, slave_dev); - /* Just unlinking ourselves as uppers of the master is not - * sufficient. When the master net device unregisters, that will - * also call dev_close, which we will catch as NETDEV_GOING_DOWN - * and trigger a dev_close on our own devices (dsa_slave_close). - * In turn, that will call dev_mc_unsync on the master's net - * device. If the master is also a DSA switch port, this will - * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on - * its own master. Lockdep will complain about the fact that - * all cascaded masters have the same dsa_master_addr_list_lock_key, - * which it normally would not do if the cascaded masters would - * be in a proper upper/lower relationship, which we've just - * destroyed. - * To suppress the lockdep warnings, let's actually unregister - * the DSA slave interfaces too, to avoid the nonsensical - * multicast address list synchronization on shutdown. - */ - unregister_netdevice_queue(slave_dev, &unregister_list); } - unregister_netdevice_many(&unregister_list); + + /* Disconnect from further netdevice notifiers on the master, + * since netdev_uses_dsa() will now return false. + */ + dsa_switch_for_each_cpu_port(dp, ds) + dp->master->dsa_ptr = NULL; rtnl_unlock(); mutex_unlock(&dsa2_mutex); diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index 277124f206e0639716f6bafe6e98bd1c49a14446..e0b072aecf0f30fe5b410ef0418fc01c765474c9 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1441,7 +1441,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1634,7 +1634,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1812,7 +1812,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1988,7 +1988,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 57c1d8431386272abe79e5e28bd65877e4633c03..139cec29ed06cd092ebdfd2bf0d13aaf67c5359d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -162,12 +162,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - if (ip_dont_fragment(sk, &rt->dst)) { + /* Do not bother generating IPID for small packets (eg SYNACK) */ + if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) { iph->frag_off = htons(IP_DF); iph->id = 0; } else { iph->frag_off = 0; - __ip_select_ident(net, iph, 1); + /* TCP packets here are SYNACK with fat IPv4/TCP options. + * Avoid using the hashed IP ident generator. + */ + if (sk->sk_protocol == IPPROTO_TCP) + iph->id = (__force __be16)prandom_u32(); + else + __ip_select_ident(net, iph, 1); } if (opt && opt->opt.optlen) { @@ -825,15 +832,24 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, /* Everything is OK. Generate! */ ip_fraglist_init(skb, iph, hlen, &iter); - if (iter.frag) - ip_options_fragment(iter.frag); - for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) { + bool first_frag = (iter.offset == 0); + IPCB(iter.frag)->flags = IPCB(skb)->flags; ip_fraglist_prepare(skb, &iter); + if (first_frag && IPCB(skb)->opt.optlen) { + /* ipcb->opt is not populated for frags + * coming from __ip_make_skb(), + * ip_options_fragment() needs optlen + */ + IPCB(iter.frag)->opt.optlen = + IPCB(skb)->opt.optlen; + ip_options_fragment(iter.frag); + ip_send_check(iter.iph); + } } skb->tstamp = tstamp; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 07274619b9ea11837501f8fe812d616d20573ee0..29bbe2b08ae970e5accd7e1b01bcd5f502a66810 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -256,7 +256,9 @@ static int __net_init ipmr_rules_init(struct net *net) return 0; err2: + rtnl_lock(); ipmr_free_table(mrt); + rtnl_unlock(); err1: fib_rules_unregister(ops); return err; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 67087f95579fdea6b7b6d10d279b2cadf32f6165..aab384126f61ffb9a912e7cbd3b16e820cb8a162 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -58,10 +58,6 @@ config NF_TABLES_ARP endif # NF_TABLES -config NF_FLOW_TABLE_IPV4 - tristate - select NF_FLOW_TABLE_INET - config NF_DUP_IPV4 tristate "Netfilter IPv4 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index b518f20c9a244df13147ee4835cbe6251ed6026d..f8e176c77d1c1e177bb374379e24fabe2040fc5d 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -776,7 +776,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *sf = file->private_data; - struct clusterip_config *c = PDE_DATA(inode); + struct clusterip_config *c = pde_data(inode); sf->private = c; @@ -788,7 +788,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) static int clusterip_proc_release(struct inode *inode, struct file *file) { - struct clusterip_config *c = PDE_DATA(inode); + struct clusterip_config *c = pde_data(inode); int ret; ret = seq_release(inode, file); @@ -802,7 +802,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file) static ssize_t clusterip_proc_write(struct file *file, const char __user *input, size_t size, loff_t *ofs) { - struct clusterip_config *c = PDE_DATA(file_inode(file)); + struct clusterip_config *c = pde_data(file_inode(file)); #define PROC_WRITELEN 10 char buffer[PROC_WRITELEN+1]; unsigned long nodenum; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 0e56df3a45e266771b7bb02e3ce7a920d2b19d29..bcf7bc71cb56c81ff5ae30fbf3b4d4aee5cc7476 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -220,7 +220,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) continue; } - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != inet_sdif(skb)) continue; sock_hold(sk); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index a53f256bf9d39157021f84ce395fa1a0b9fb46ab..9f97b9cbf7b37e41648cbc98fb994adeb63f43cb 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -722,6 +722,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret = -EINVAL; int chk_addr_ret; + lock_sock(sk); if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; @@ -741,7 +742,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; -out: return ret; +out: + release_sock(sk); + return ret; } /* @@ -971,7 +974,7 @@ struct proto raw_prot = { static struct sock *raw_get_first(struct seq_file *seq) { struct sock *sk; - struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; @@ -987,7 +990,7 @@ found: static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) { - struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); do { @@ -1016,7 +1019,7 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) void *raw_seq_start(struct seq_file *seq, loff_t *pos) __acquires(&h->lock) { - struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); read_lock(&h->lock); return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; @@ -1039,7 +1042,7 @@ EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) __releases(&h->lock) { - struct raw_hashinfo *h = PDE_DATA(file_inode(seq->file)); + struct raw_hashinfo *h = pde_data(file_inode(seq->file)); read_unlock(&h->lock); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3b75836db19b07b0f178ef4457bda0ec641fd40d..02cb275e5487d98b3e124ee102163aac47b2ad6d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -842,6 +842,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, } release_sock(sk); + sk_defer_free_flush(sk); if (spliced) return spliced; @@ -936,6 +937,22 @@ void tcp_remove_empty_skb(struct sock *sk) } } +/* skb changing from pure zc to mixed, must charge zc */ +static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb) +{ + if (unlikely(skb_zcopy_pure(skb))) { + u32 extra = skb->truesize - + SKB_TRUESIZE(skb_end_offset(skb)); + + if (!sk_wmem_schedule(sk, extra)) + return -ENOMEM; + + sk_mem_charge(sk, extra); + skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY; + } + return 0; +} + static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, struct page *page, int offset, size_t *size) { @@ -971,7 +988,7 @@ new_segment: tcp_mark_push(tp, skb); goto new_segment; } - if (!sk_wmem_schedule(sk, copy)) + if (tcp_downgrade_zcopy_pure(sk, skb) || !sk_wmem_schedule(sk, copy)) return NULL; if (can_coalesce) { @@ -1319,16 +1336,8 @@ new_segment: copy = min_t(int, copy, pfrag->size - pfrag->offset); - /* skb changing from pure zc to mixed, must charge zc */ - if (unlikely(skb_zcopy_pure(skb))) { - if (!sk_wmem_schedule(sk, skb->data_len)) - goto wait_for_space; - - sk_mem_charge(sk, skb->data_len); - skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY; - } - - if (!sk_wmem_schedule(sk, copy)) + if (tcp_downgrade_zcopy_pure(sk, skb) || + !sk_wmem_schedule(sk, copy)) goto wait_for_space; err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index dc49a3d551eb919baf5ad812ef21698c5c7b9679..bfe4112e000c09ba9d7d8b64392f52337b9053e9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1660,6 +1660,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, (mss != tcp_skb_seglen(skb))) goto out; + if (!tcp_skb_can_collapse(prev, skb)) + goto out; len = skb->len; pcount = tcp_skb_pcount(skb); if (tcp_skb_shift(prev, skb, pcount, len)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b3f34e366b27f7f1aece164aa485b1e9a7248d93..fec656f5a39ee2f30ffa3a65daea3dee2d068a28 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2095,7 +2095,7 @@ process: nf_reset_ct(skb); if (tcp_filter(sk, skb)) { - drop_reason = SKB_DROP_REASON_TCP_FILTER; + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto discard_and_relse; } th = (const struct tcphdr *)skb->data; @@ -3002,7 +3002,7 @@ static unsigned short seq_file_family(const struct seq_file *seq) #endif /* Iterated from proc fs */ - afinfo = PDE_DATA(file_inode(seq->file)); + afinfo = pde_data(file_inode(seq->file)); return afinfo->family; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 464590ea922e153e3391ce8f9601c0bb63aae4f2..090360939401e60d7844f18b3fd2fb283bdc2b5e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2960,7 +2960,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) if (state->bpf_seq_afinfo) afinfo = state->bpf_seq_afinfo; else - afinfo = PDE_DATA(file_inode(seq->file)); + afinfo = pde_data(file_inode(seq->file)); for (state->bucket = start; state->bucket <= afinfo->udp_table->mask; ++state->bucket) { @@ -2993,7 +2993,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) if (state->bpf_seq_afinfo) afinfo = state->bpf_seq_afinfo; else - afinfo = PDE_DATA(file_inode(seq->file)); + afinfo = pde_data(file_inode(seq->file)); do { sk = sk_next(sk); @@ -3050,7 +3050,7 @@ void udp_seq_stop(struct seq_file *seq, void *v) if (state->bpf_seq_afinfo) afinfo = state->bpf_seq_afinfo; else - afinfo = PDE_DATA(file_inode(seq->file)); + afinfo = pde_data(file_inode(seq->file)); if (state->bucket <= afinfo->udp_table->mask) spin_unlock_bh(&afinfo->udp_table->hash[state->bucket].lock); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3eee17790a82fe6c528db4e821b11444cfa26866..f927c199a93c3ed1124be02eb32a20900980f337 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2589,7 +2589,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, __u32 valid_lft, u32 prefered_lft) { struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1); - int create = 0; + int create = 0, update_lft = 0; if (!ifp && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -2633,19 +2633,32 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, unsigned long now; u32 stored_lft; - /* Update lifetime (RFC4862 5.5.3 e) - * We deviate from RFC4862 by honoring all Valid Lifetimes to - * improve the reaction of SLAAC to renumbering events - * (draft-gont-6man-slaac-renum-06, Section 4.2) - */ + /* update lifetime (RFC2462 5.5.3 e) */ spin_lock_bh(&ifp->lock); now = jiffies; if (ifp->valid_lft > (now - ifp->tstamp) / HZ) stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!create && stored_lft) { + const u32 minimum_lft = min_t(u32, + stored_lft, MIN_VALID_LIFETIME); + valid_lft = max(valid_lft, minimum_lft); + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = 1; + } + + if (update_lft) { ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = now; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 463c37dea449749ac0feedeb731b628b75133bba..413f66781e50de62d6b20042d84798e7da59165a 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i) fn = rcu_dereference_protected(f6i->fib6_node, lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) - fn->fn_sernum = fib6_new_sernum(net); + WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net)); } /* @@ -590,12 +590,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, spin_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; - cb->args[5] = w->root->fn_sernum; + cb->args[5] = READ_ONCE(w->root->fn_sernum); } } else { - if (cb->args[5] != w->root->fn_sernum) { + int sernum = READ_ONCE(w->root->fn_sernum); + if (cb->args[5] != sernum) { /* Begin at the root if the tree changed */ - cb->args[5] = w->root->fn_sernum; + cb->args[5] = sernum; w->state = FWS_INIT; w->node = w->root; w->skip = w->count; @@ -1345,7 +1346,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, /* paired with smp_rmb() in fib6_get_cookie_safe() */ smp_wmb(); while (fn) { - fn->fn_sernum = sernum; + WRITE_ONCE(fn->fn_sernum, sernum); fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&rt->fib6_table->tb6_lock)); } @@ -2174,8 +2175,8 @@ static int fib6_clean_node(struct fib6_walker *w) }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && - w->node->fn_sernum != c->sernum) - w->node->fn_sernum = c->sernum; + READ_ONCE(w->node->fn_sernum) != c->sernum) + WRITE_ONCE(w->node->fn_sernum, c->sernum); if (!c->func) { WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); @@ -2543,7 +2544,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, iter->w.state = FWS_INIT; iter->w.node = iter->w.root; iter->w.args = iter; - iter->sernum = iter->w.root->fn_sernum; + iter->sernum = READ_ONCE(iter->w.root->fn_sernum); INIT_LIST_HEAD(&iter->w.lh); fib6_walker_link(net, &iter->w); } @@ -2571,8 +2572,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) { - if (iter->sernum != iter->w.root->fn_sernum) { - iter->sernum = iter->w.root->fn_sernum; + int sernum = READ_ONCE(iter->w.root->fn_sernum); + + if (iter->sernum != sernum) { + iter->sernum = sernum; iter->w.state = FWS_INIT; iter->w.node = iter->w.root; WARN_ON(iter->w.skip); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index fe786df4f8493396b803002d25701affd59ee96c..97ade833f58cc29f3fb3b681b017526da76b72da 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Local address not yet configured!\n", - p->name); + pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", + p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", - p->name); + pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", + p->name); else ret = 1; rcu_read_unlock(); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7cf73e60e619ba92ea1eccc90c181ba7150225dd..8a2db926b5eb6ab2e08691ad7244c504611540e6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -243,7 +243,9 @@ static int __net_init ip6mr_rules_init(struct net *net) return 0; err2: + rtnl_lock(); ip6mr_free_table(mrt); + rtnl_unlock(); err1: fib_rules_unregister(ops); return err; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 97d3d1b36dbceb602f5c0b4029dddae57c5039c2..0ba62f4868f97695bdcc60ae58796c4f7c434dcc 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -47,10 +47,6 @@ config NFT_FIB_IPV6 endif # NF_TABLES_IPV6 endif # NF_TABLES -config NF_FLOW_TABLE_IPV6 - tristate - select NF_FLOW_TABLE_INET - config NF_DUP_IPV6 tristate "Netfilter IPv6 packet duplication to alternate destination" depends on !NF_CONNTRACK || NF_CONNTRACK diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index b85383606df71b5c0a83bacdd345b73e761c5a7a..b8d6dc9aeeb6f403e8f2553b8b182d47eae49074 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -28,9 +28,6 @@ obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o -# flow table support -obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o - # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e6de94203c130c0a6fc6abca623790738df1e4fa..f4884cda13b92e72d041680cbabfee2e07ec0f10 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (from) { fn = rcu_dereference(from->fib6_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) - fn->fn_sernum = -1; + WRITE_ONCE(fn->fn_sernum, -1); } } rcu_read_unlock(); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 48f75a56f4ae7d5c0836198c3f02789dcc76e74e..d6fdc5782d332c4a7791fa1aa025a2e7e6ffcddd 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1607,6 +1607,7 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct mpls_dev *mdev; unsigned int flags; + int err; if (event == NETDEV_REGISTER) { mdev = mpls_add_dev(dev); @@ -1621,7 +1622,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, return NOTIFY_OK; switch (event) { - int err; case NETDEV_DOWN: err = mpls_ifdown(dev, event); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 75af1f701e1d1fe93be901a569dd820d1ad01a9a..356f596e2032306933cbcd17b3838800fb25ccce 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -478,6 +478,20 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id) return NULL; } +static struct mptcp_pm_addr_entry * +__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info, + bool lookup_by_id) +{ + struct mptcp_pm_addr_entry *entry; + + list_for_each_entry(entry, &pernet->local_addr_list, list) { + if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) || + (lookup_by_id && entry->addr.id == info->id)) + return entry; + } + return NULL; +} + static int lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr) { @@ -777,7 +791,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, removed = true; __MPTCP_INC_STATS(sock_net(sk), rm_type); } - __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap); + __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap); if (!removed) continue; @@ -911,6 +925,7 @@ out: static int mptcp_pm_nl_create_listen_socket(struct sock *sk, struct mptcp_pm_addr_entry *entry) { + int addrlen = sizeof(struct sockaddr_in); struct sockaddr_storage addr; struct mptcp_sock *msk; struct socket *ssock; @@ -935,8 +950,11 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk, } mptcp_info2sockaddr(&entry->addr, &addr, entry->addr.family); - err = kernel_bind(ssock, (struct sockaddr *)&addr, - sizeof(struct sockaddr_in)); +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + if (entry->addr.family == AF_INET6) + addrlen = sizeof(struct sockaddr_in6); +#endif + err = kernel_bind(ssock, (struct sockaddr *)&addr, addrlen); if (err) { pr_warn("kernel_bind error, err=%d", err); goto out; @@ -1763,18 +1781,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) return -EOPNOTSUPP; } - list_for_each_entry(entry, &pernet->local_addr_list, list) { - if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) || - (lookup_by_id && entry->addr.id == addr.addr.id)) { - mptcp_nl_addr_backup(net, &entry->addr, bkup); - - if (bkup) - entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; - else - entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; - } + spin_lock_bh(&pernet->lock); + entry = __lookup_addr(pernet, &addr.addr, lookup_by_id); + if (!entry) { + spin_unlock_bh(&pernet->lock); + return -EINVAL; } + if (bkup) + entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else + entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + addr = *entry; + spin_unlock_bh(&pernet->lock); + + mptcp_nl_addr_backup(net, &addr.addr, bkup); return 0; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 0e6b42c76ea072ff6dab000e4ac27f1571b35b34..85317ce38e3fa2d8e53fde8790c52a6ba8638d2f 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -408,7 +408,7 @@ DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); struct mptcp_subflow_context { struct list_head node;/* conn_list of subflows */ - char reset_start[0]; + struct_group(reset, unsigned long avg_pacing_rate; /* protected by msk socket lock */ u64 local_key; @@ -458,7 +458,7 @@ struct mptcp_subflow_context { long delegated_status; - char reset_end[0]; + ); struct list_head delegated_node; /* link into delegated_action, protected by local BH */ @@ -494,7 +494,7 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) static inline void mptcp_subflow_ctx_reset(struct mptcp_subflow_context *subflow) { - memset(subflow->reset_start, 0, subflow->reset_end - subflow->reset_start); + memset(&subflow->reset, 0, sizeof(subflow->reset)); subflow->request_mptcp = 1; } diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 7121ce2a47c0bb12658177a408af5d2a0739d0ba..78814417d753fc99950a0ca3d5e0a6310c4aff36 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -608,7 +608,7 @@ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, bitmap = &ncf->bitmap; spin_lock_irqsave(&nc->lock, flags); - index = find_next_bit(bitmap, ncf->n_vids, 0); + index = find_first_bit(bitmap, ncf->n_vids); if (index >= ncf->n_vids) { spin_unlock_irqrestore(&nc->lock, flags); return -1; @@ -667,7 +667,7 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, return -1; } - index = find_next_zero_bit(bitmap, ncf->n_vids, 0); + index = find_first_zero_bit(bitmap, ncf->n_vids); if (index < 0 || index >= ncf->n_vids) { netdev_err(ndp->ndev.dev, "Channel %u already has all VLAN filters set\n", diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 894a325d39f2028ed899feec8cb9e4bd2199eca7..d6aa5b47031ebcb4a35abbd01f48fc46b746c8ab 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1924,15 +1924,17 @@ repeat: pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_ct_put(ct); skb->_nfct = 0; - NF_CT_STAT_INC_ATOMIC(state->net, invalid); - if (ret == -NF_DROP) - NF_CT_STAT_INC_ATOMIC(state->net, drop); /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a * fresh conntrack. */ if (ret == -NF_REPEAT) goto repeat; + + NF_CT_STAT_INC_ATOMIC(state->net, invalid); + if (ret == -NF_DROP) + NF_CT_STAT_INC_ATOMIC(state->net, drop); + ret = -ret; goto out; } diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 7f19ee25960905efed2c95db5402309199a99027..55415f011943dadf3ffdd14cc1df473d8bf17369 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -20,13 +20,14 @@ #include #include +#define HELPER_NAME "netbios-ns" #define NMBD_PORT 137 MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_conntrack_netbios_ns"); -MODULE_ALIAS_NFCT_HELPER("netbios_ns"); +MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); @@ -44,7 +45,7 @@ static int netbios_ns_help(struct sk_buff *skb, unsigned int protoff, } static struct nf_conntrack_helper helper __read_mostly = { - .name = "netbios-ns", + .name = HELPER_NAME, .tuple.src.l3num = NFPROTO_IPV4, .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), .tuple.dst.protonum = IPPROTO_UDP, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ac438370f94a9240bf1242bf54e36a66fdb6970c..7032402ffd33137179738e71aeaae72f2579c7f5 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2311,7 +2311,8 @@ ctnetlink_create_conntrack(struct net *net, if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); - /* not in hash table yet so not strictly necessary */ + /* disable helper auto-assignment for this entry */ + ct->status |= IPS_HELPER; RCU_INIT_POINTER(help->helper, helper); } } else { diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 2394238d01c9176e5b3f018c53132ae4889b8a2e..5a936334b517adbd56956d6740e8128aba01dd30 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -489,6 +489,15 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; + + /* don't renew timeout on init retransmit so + * port reuse by client or NAT middlebox cannot + * keep entry alive indefinitely (incl. nat info). + */ + if (new_state == SCTP_CONNTRACK_CLOSED && + old_state == SCTP_CONNTRACK_CLOSED && + nf_ct_is_confirmed(ct)) + ignore = true; } ct->proto.sctp.state = new_state; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index af5115e127cfd75e09ba8dc9e1bf70719c8dc645..d1582b888c0d87780895f8da7162471019547451 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -446,6 +446,32 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, } } +static void tcp_init_sender(struct ip_ct_tcp_state *sender, + struct ip_ct_tcp_state *receiver, + const struct sk_buff *skb, + unsigned int dataoff, + const struct tcphdr *tcph, + u32 end, u32 win) +{ + /* SYN-ACK in reply to a SYN + * or SYN from reply direction in simultaneous open. + */ + sender->td_end = + sender->td_maxend = end; + sender->td_maxwin = (win == 0 ? 1 : win); + + tcp_options(skb, dataoff, tcph, sender); + /* RFC 1323: + * Both sides must send the Window Scale option + * to enable window scaling in either direction. + */ + if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && + receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) { + sender->td_scale = 0; + receiver->td_scale = 0; + } +} + static bool tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, unsigned int index, @@ -499,24 +525,9 @@ static bool tcp_in_window(struct nf_conn *ct, * Initialize sender data. */ if (tcph->syn) { - /* - * SYN-ACK in reply to a SYN - * or SYN from reply direction in simultaneous open. - */ - sender->td_end = - sender->td_maxend = end; - sender->td_maxwin = (win == 0 ? 1 : win); - - tcp_options(skb, dataoff, tcph, sender); - /* - * RFC 1323: - * Both sides must send the Window Scale option - * to enable window scaling in either direction. - */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE - && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) - sender->td_scale = - receiver->td_scale = 0; + tcp_init_sender(sender, receiver, + skb, dataoff, tcph, + end, win); if (!tcph->ack) /* Simultaneous open */ return true; @@ -560,6 +571,18 @@ static bool tcp_in_window(struct nf_conn *ct, sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); + } else if (tcph->syn && dir == IP_CT_DIR_REPLY && + state->state == TCP_CONNTRACK_SYN_SENT) { + /* Retransmitted syn-ack, or syn (simultaneous open). + * + * Re-init state for this direction, just like for the first + * syn(-ack) reply, it might differ in seq, ack or tcp options. + */ + tcp_init_sender(sender, receiver, + skb, dataoff, tcph, + end, win); + if (!tcph->ack) + return true; } if (!(tcph->ack)) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 77938b1042f3fa5980cdcdc4f32ca52cc075b96f..5fa16990da95177791dfaa9e7bb31c92f3cae096 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2011,7 +2011,6 @@ static void nft_last_rule(struct nft_rule_blob *blob, const void *ptr) prule = (struct nft_rule_dp *)ptr; prule->is_last = 1; - ptr += offsetof(struct nft_rule_dp, data); /* blob size does not include the trailer rule */ } @@ -8264,14 +8263,12 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha void *data, *data_boundary; struct nft_rule_dp *prule; struct nft_rule *rule; - int i; /* already handled or inactive chain? */ if (chain->blob_next || !nft_is_active_next(net, chain)) return 0; rule = list_entry(&chain->rules, struct nft_rule, list); - i = 0; data_size = 0; list_for_each_entry_continue(rule, &chain->rules, list) { @@ -8301,7 +8298,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha return -ENOMEM; size = 0; - track.last = last; + track.last = nft_expr_last(rule); nft_rule_for_each_expr(expr, last, rule) { track.cur = expr; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index 9d5947ab8d4ef33531dd82fa42057d24273f1260..e646e9ee4a98754136681139010341d255829564 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -167,12 +167,24 @@ nla_put_failure: return -1; } +static bool nft_byteorder_reduce(struct nft_regs_track *track, + const struct nft_expr *expr) +{ + struct nft_byteorder *priv = nft_expr_priv(expr); + + track->regs[priv->dreg].selector = NULL; + track->regs[priv->dreg].bitwise = NULL; + + return false; +} + static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, + .reduce = nft_byteorder_reduce, }; struct nft_expr_type nft_byteorder_type __read_mostly = { diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 7d00a1452b1d29ae993fc059d356cfec0e04bf50..3362417ebfdb7a2f1ff305ae36af49d039b72dd4 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -62,6 +62,7 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, { bool invert = false; u32 flags, limit; + int err; if (!tb[NFTA_CONNLIMIT_COUNT]) return -EINVAL; @@ -84,7 +85,15 @@ static int nft_connlimit_do_init(const struct nft_ctx *ctx, priv->limit = limit; priv->invert = invert; - return nf_ct_netns_get(ctx->net, ctx->family); + err = nf_ct_netns_get(ctx->net, ctx->family); + if (err < 0) + goto err_netns; + + return 0; +err_netns: + kfree(priv->list); + + return err; } static void nft_connlimit_do_destroy(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 518d96c8c247143729f5aa161e49e5574716b2cd..5adf8bb628a80eadc073a2a064910b15b2ae2b1c 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -260,9 +260,12 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr, ct = this_cpu_read(nft_ct_pcpu_template); if (likely(refcount_read(&ct->ct_general.use) == 1)) { + refcount_inc(&ct->ct_general.use); nf_ct_zone_add(ct, &zone); } else { - /* previous skb got queued to userspace */ + /* previous skb got queued to userspace, allocate temporary + * one until percpu template can be reused. + */ ct = nf_ct_tmpl_alloc(nft_net(pkt), &zone, GFP_ATOMIC); if (!ct) { regs->verdict.code = NF_DROP; diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index dbe1f2e7dd9ed620b1ab0c99c214178e6dbc6a16..9e927ab4df151095612e6878ca571ddf8c5f0b6d 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -167,7 +167,7 @@ nft_tcp_header_pointer(const struct nft_pktinfo *pkt, { struct tcphdr *tcph; - if (pkt->tprot != IPPROTO_TCP) + if (pkt->tprot != IPPROTO_TCP || pkt->fragoff) return NULL; tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 940fed9a760b3f987d5558af5293a527bc333696..5cc06aef43452a8dcd30bb38e4dd6b0865566ed7 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -83,7 +83,7 @@ static int __nft_payload_inner_offset(struct nft_pktinfo *pkt) { unsigned int thoff = nft_thoff(pkt); - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) return -1; switch (pkt->tprot) { @@ -147,7 +147,7 @@ void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) goto err; offset = nft_thoff(pkt); break; @@ -688,7 +688,7 @@ static void nft_payload_set_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: - if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) + if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) goto err; offset = nft_thoff(pkt); break; @@ -728,7 +728,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr, if (priv->csum_type == NFT_PAYLOAD_CSUM_SCTP && pkt->tprot == IPPROTO_SCTP && skb->ip_summed != CHECKSUM_PARTIAL) { - if (nft_payload_csum_sctp(skb, nft_thoff(pkt))) + if (pkt->fragoff == 0 && + nft_payload_csum_sctp(skb, nft_thoff(pkt))) goto err; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 25524e3933496098f68748286e56839454ee842c..54a489f16b171220aaffef3ea43355fd89cd91ce 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1517,7 +1517,7 @@ EXPORT_SYMBOL_GPL(xt_unregister_table); #ifdef CONFIG_PROC_FS static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) { - u8 af = (unsigned long)PDE_DATA(file_inode(seq->file)); + u8 af = (unsigned long)pde_data(file_inode(seq->file)); struct net *net = seq_file_net(seq); struct xt_pernet *xt_net; @@ -1529,7 +1529,7 @@ static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - u8 af = (unsigned long)PDE_DATA(file_inode(seq->file)); + u8 af = (unsigned long)pde_data(file_inode(seq->file)); struct net *net = seq_file_net(seq); struct xt_pernet *xt_net; @@ -1540,7 +1540,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void xt_table_seq_stop(struct seq_file *seq, void *v) { - u_int8_t af = (unsigned long)PDE_DATA(file_inode(seq->file)); + u_int8_t af = (unsigned long)pde_data(file_inode(seq->file)); mutex_unlock(&xt[af].mutex); } @@ -1584,7 +1584,7 @@ static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, [MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC, [MTTG_TRAV_NFP_SPEC] = MTTG_TRAV_DONE, }; - uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file)); + uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file)); struct nf_mttg_trav *trav = seq->private; if (ppos != NULL) @@ -1633,7 +1633,7 @@ static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos, static void xt_mttg_seq_stop(struct seq_file *seq, void *v) { - uint8_t nfproto = (unsigned long)PDE_DATA(file_inode(seq->file)); + uint8_t nfproto = (unsigned long)pde_data(file_inode(seq->file)); struct nf_mttg_trav *trav = seq->private; switch (trav->class) { diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 9c5cfd74a0ee488e9e996c8892cf9e08ddcdc1c3..0859b8f767645c7562f1688850e73a199e5608aa 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -1052,7 +1052,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { static void *dl_seq_start(struct seq_file *s, loff_t *pos) __acquires(htable->lock) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); + struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file)); unsigned int *bucket; spin_lock_bh(&htable->lock); @@ -1069,7 +1069,7 @@ static void *dl_seq_start(struct seq_file *s, loff_t *pos) static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); + struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file)); unsigned int *bucket = v; *pos = ++(*bucket); @@ -1083,7 +1083,7 @@ static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) static void dl_seq_stop(struct seq_file *s, void *v) __releases(htable->lock) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); + struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file)); unsigned int *bucket = v; if (!IS_ERR(bucket)) @@ -1125,7 +1125,7 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { - struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file)); + struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file)); spin_lock(&ent->lock); /* recalculate to show accurate numbers */ @@ -1140,7 +1140,7 @@ static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { - struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file)); + struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file)); spin_lock(&ent->lock); /* recalculate to show accurate numbers */ @@ -1155,7 +1155,7 @@ static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { - struct xt_hashlimit_htable *ht = PDE_DATA(file_inode(s->file)); + struct xt_hashlimit_htable *ht = pde_data(file_inode(s->file)); spin_lock(&ent->lock); /* recalculate to show accurate numbers */ @@ -1169,7 +1169,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, static int dl_seq_show_v2(struct seq_file *s, void *v) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); + struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file)); unsigned int *bucket = (unsigned int *)v; struct dsthash_ent *ent; @@ -1183,7 +1183,7 @@ static int dl_seq_show_v2(struct seq_file *s, void *v) static int dl_seq_show_v1(struct seq_file *s, void *v) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); + struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file)); unsigned int *bucket = v; struct dsthash_ent *ent; @@ -1197,7 +1197,7 @@ static int dl_seq_show_v1(struct seq_file *s, void *v) static int dl_seq_show(struct seq_file *s, void *v) { - struct xt_hashlimit_htable *htable = PDE_DATA(file_inode(s->file)); + struct xt_hashlimit_htable *htable = pde_data(file_inode(s->file)); unsigned int *bucket = v; struct dsthash_ent *ent; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 0446307516cdff3b9f219808b95a8a241a82e29b..7ddb9a78e3fc888d3ee4c214870ae8c84171efe7 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -551,7 +551,7 @@ static int recent_seq_open(struct inode *inode, struct file *file) if (st == NULL) return -ENOMEM; - st->table = PDE_DATA(inode); + st->table = pde_data(inode); return 0; } @@ -559,7 +559,7 @@ static ssize_t recent_mt_proc_write(struct file *file, const char __user *input, size_t size, loff_t *loff) { - struct recent_table *t = PDE_DATA(file_inode(file)); + struct recent_table *t = pde_data(file_inode(file)); struct recent_entry *e; char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; const char *c = buf; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5bd409ab4cc2001f2ac2d045e77f96c8bbba956a..ab87f22cc7ecde517ba4cd0b3804a28c3cccfc85 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1774,6 +1774,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; list_add(&match->list, &fanout_list); @@ -1788,7 +1789,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -ENOSPC; if (refcount_read(&match->sk_ref) < match->max_num_members) { __dev_remove_pack(&po->prot_hook); - po->fanout = match; + + /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ + WRITE_ONCE(po->fanout, match); + po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); @@ -3353,6 +3357,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; + po->prot_hook.af_packet_net = sock_net(sk); if (proto) { po->prot_hook.type = proto; @@ -3932,7 +3937,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } case PACKET_FANOUT_DATA: { - if (!po->fanout) + /* Paired with the WRITE_ONCE() in fanout_add() */ + if (!READ_ONCE(po->fanout)) return -EINVAL; return fanout_set_data(po, optval, optlen); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6be2672a65eabebd5b8fc28d82ed9b3c58542005..df864e6922679140d5b7c8c3aadd65a92906e9e3 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct sk_buff *skb; - unsigned long resend_at, rto_j; + unsigned long resend_at; rxrpc_seq_t cursor, seq, top; ktime_t now, max_age, oldest, ack_ts; int ix; @@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - rto_j = call->peer->rto_j; - now = ktime_get_real(); - max_age = ktime_sub(now, jiffies_to_usecs(rto_j)); + max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j)); spin_lock_bh(&call->lock); @@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rto_j; + resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans); WRITE_ONCE(call->resend_at, resend_at); if (unacked) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 10f2bf2e9068abc15ef4ece90e7d6652194b4f77..a45c83f22236e2648c2fa4f97ebebb088361a071 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -468,7 +468,7 @@ done: if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); + ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d4e27c679123f0f62d033c3ad0eeda838e83cb49..5f0f346b576fc0f52a5b8de21758af5425285c2c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1945,9 +1945,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, bool prio_allocate; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; + struct Qdisc *q; struct tcf_chain_info chain_info; - struct tcf_chain *chain = NULL; + struct tcf_chain *chain; struct tcf_block *block; struct tcf_proto *tp; unsigned long cl; @@ -1976,6 +1976,8 @@ replay: tp = NULL; cl = 0; block = NULL; + q = NULL; + chain = NULL; flags = 0; if (prio == 0) { @@ -2798,8 +2800,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, struct tcmsg *t; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; - struct tcf_chain *chain = NULL; + struct Qdisc *q; + struct tcf_chain *chain; struct tcf_block *block; unsigned long cl; int err; @@ -2809,6 +2811,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: + q = NULL; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2cb496c8487880d0becf4d81b2352bf579ef1fb0..179825a3b2fdb52a5d13bfa529cf494aa6c9e7fb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1204,7 +1204,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, err = -ENOENT; if (!ops) { - NL_SET_ERR_MSG(extack, "Specified qdisc not found"); + NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9267922ea9c375dc555e8c17f9d9e9a90721330a..23a9d6242429f1f9fadd047602108daffe3708ea 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1810,6 +1810,26 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + if (q->offload) { + /* Options not supported by the offload. */ + if (hopt->rate.overhead || hopt->ceil.overhead) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the overhead parameter"); + goto failure; + } + if (hopt->rate.mpu || hopt->ceil.mpu) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the mpu parameter"); + goto failure; + } + if (hopt->quantum) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter"); + goto failure; + } + if (hopt->prio) { + NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter"); + goto failure; + } + } + /* Keeping backward compatible with rate_table based iproute2 tc */ if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB], diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 961854e56736642cbec3d323296d26fd905aff78..8c89d0b0ca185746e0f288525cb91a13734445bc 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -566,11 +566,114 @@ static void smc_stat_fallback(struct smc_sock *smc) mutex_unlock(&net->smc.mutex_fback_rsn); } -static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code) +/* must be called under rcu read lock */ +static void smc_fback_wakeup_waitqueue(struct smc_sock *smc, void *key) { - wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); - wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk); - unsigned long flags; + struct socket_wq *wq; + __poll_t flags; + + wq = rcu_dereference(smc->sk.sk_wq); + if (!skwq_has_sleeper(wq)) + return; + + /* wake up smc sk->sk_wq */ + if (!key) { + /* sk_state_change */ + wake_up_interruptible_all(&wq->wait); + } else { + flags = key_to_poll(key); + if (flags & (EPOLLIN | EPOLLOUT)) + /* sk_data_ready or sk_write_space */ + wake_up_interruptible_sync_poll(&wq->wait, flags); + else if (flags & EPOLLERR) + /* sk_error_report */ + wake_up_interruptible_poll(&wq->wait, flags); + } +} + +static int smc_fback_mark_woken(wait_queue_entry_t *wait, + unsigned int mode, int sync, void *key) +{ + struct smc_mark_woken *mark = + container_of(wait, struct smc_mark_woken, wait_entry); + + mark->woken = true; + mark->key = key; + return 0; +} + +static void smc_fback_forward_wakeup(struct smc_sock *smc, struct sock *clcsk, + void (*clcsock_callback)(struct sock *sk)) +{ + struct smc_mark_woken mark = { .woken = false }; + struct socket_wq *wq; + + init_waitqueue_func_entry(&mark.wait_entry, + smc_fback_mark_woken); + rcu_read_lock(); + wq = rcu_dereference(clcsk->sk_wq); + if (!wq) + goto out; + add_wait_queue(sk_sleep(clcsk), &mark.wait_entry); + clcsock_callback(clcsk); + remove_wait_queue(sk_sleep(clcsk), &mark.wait_entry); + + if (mark.woken) + smc_fback_wakeup_waitqueue(smc, mark.key); +out: + rcu_read_unlock(); +} + +static void smc_fback_state_change(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_state_change); +} + +static void smc_fback_data_ready(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_data_ready); +} + +static void smc_fback_write_space(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_write_space); +} + +static void smc_fback_error_report(struct sock *clcsk) +{ + struct smc_sock *smc = + smc_clcsock_user_data(clcsk); + + if (!smc) + return; + smc_fback_forward_wakeup(smc, clcsk, smc->clcsk_error_report); +} + +static int smc_switch_to_fallback(struct smc_sock *smc, int reason_code) +{ + struct sock *clcsk; + + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } + clcsk = smc->clcsock->sk; smc->use_fallback = true; smc->fallback_rsn = reason_code; @@ -582,22 +685,40 @@ static void smc_switch_to_fallback(struct smc_sock *smc, int reason_code) smc->clcsock->wq.fasync_list = smc->sk.sk_socket->wq.fasync_list; - /* There may be some entries remaining in - * smc socket->wq, which should be removed - * to clcsocket->wq during the fallback. + /* There might be some wait entries remaining + * in smc sk->sk_wq and they should be woken up + * as clcsock's wait queue is woken up. */ - spin_lock_irqsave(&smc_wait->lock, flags); - spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); - list_splice_init(&smc_wait->head, &clc_wait->head); - spin_unlock(&clc_wait->lock); - spin_unlock_irqrestore(&smc_wait->lock, flags); + smc->clcsk_state_change = clcsk->sk_state_change; + smc->clcsk_data_ready = clcsk->sk_data_ready; + smc->clcsk_write_space = clcsk->sk_write_space; + smc->clcsk_error_report = clcsk->sk_error_report; + + clcsk->sk_state_change = smc_fback_state_change; + clcsk->sk_data_ready = smc_fback_data_ready; + clcsk->sk_write_space = smc_fback_write_space; + clcsk->sk_error_report = smc_fback_error_report; + + smc->clcsock->sk->sk_user_data = + (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); } + mutex_unlock(&smc->clcsock_release_lock); + return 0; } /* fall back during connect */ static int smc_connect_fallback(struct smc_sock *smc, int reason_code) { - smc_switch_to_fallback(smc, reason_code); + struct net *net = sock_net(&smc->sk); + int rc = 0; + + rc = smc_switch_to_fallback(smc, reason_code); + if (rc) { /* fallback fails */ + this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt); + if (smc->sk.sk_state == SMC_INIT) + sock_put(&smc->sk); /* passive closing */ + return rc; + } smc_copy_sock_settings_to_clc(smc); smc->connect_nonblock = 0; if (smc->sk.sk_state == SMC_INIT) @@ -1518,11 +1639,12 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code, { /* RDMA setup failed, switch back to TCP */ smc_conn_abort(new_smc, local_first); - if (reason_code < 0) { /* error, no fallback possible */ + if (reason_code < 0 || + smc_switch_to_fallback(new_smc, reason_code)) { + /* error, no fallback possible */ smc_listen_out_err(new_smc); return; } - smc_switch_to_fallback(new_smc, reason_code); if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { if (smc_clc_send_decline(new_smc, reason_code, version) < 0) { smc_listen_out_err(new_smc); @@ -1964,8 +2086,11 @@ static void smc_listen_work(struct work_struct *work) /* check if peer is smc capable */ if (!tcp_sk(newclcsock->sk)->syn_smc) { - smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC); - smc_listen_out_connected(new_smc); + rc = smc_switch_to_fallback(new_smc, SMC_CLC_DECL_PEERNOSMC); + if (rc) + smc_listen_out_err(new_smc); + else + smc_listen_out_connected(new_smc); return; } @@ -2094,10 +2219,9 @@ out: static void smc_clcsock_data_ready(struct sock *listen_clcsock) { - struct smc_sock *lsmc; + struct smc_sock *lsmc = + smc_clcsock_user_data(listen_clcsock); - lsmc = (struct smc_sock *) - ((uintptr_t)listen_clcsock->sk_user_data & ~SK_USER_DATA_NOCOPY); if (!lsmc) return; lsmc->clcsk_data_ready(listen_clcsock); @@ -2254,7 +2378,9 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_FASTOPEN) { if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { - smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + if (rc) + goto out; } else { rc = -EINVAL; goto out; @@ -2447,6 +2573,11 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, /* generic setsockopts reaching us here always apply to the * CLC socket */ + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } if (unlikely(!smc->clcsock->ops->setsockopt)) rc = -EOPNOTSUPP; else @@ -2456,6 +2587,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, sk->sk_err = smc->clcsock->sk->sk_err; sk_error_report(sk); } + mutex_unlock(&smc->clcsock_release_lock); if (optlen < sizeof(int)) return -EINVAL; @@ -2472,7 +2604,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname, case TCP_FASTOPEN_NO_COOKIE: /* option not supported by SMC */ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { - smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); + rc = smc_switch_to_fallback(smc, SMC_CLC_DECL_OPTUNSUPP); } else { rc = -EINVAL; } @@ -2515,13 +2647,23 @@ static int smc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct smc_sock *smc; + int rc; smc = smc_sk(sock->sk); + mutex_lock(&smc->clcsock_release_lock); + if (!smc->clcsock) { + mutex_unlock(&smc->clcsock_release_lock); + return -EBADF; + } /* socket options apply to the CLC socket */ - if (unlikely(!smc->clcsock->ops->getsockopt)) + if (unlikely(!smc->clcsock->ops->getsockopt)) { + mutex_unlock(&smc->clcsock_release_lock); return -EOPNOTSUPP; - return smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, - optval, optlen); + } + rc = smc->clcsock->ops->getsockopt(smc->clcsock, level, optname, + optval, optlen); + mutex_unlock(&smc->clcsock_release_lock); + return rc; } static int smc_ioctl(struct socket *sock, unsigned int cmd, diff --git a/net/smc/smc.h b/net/smc/smc.h index 3d0b8e300deb32ce88837c9c35e03ace4bb61318..37b2001a02557e7f8b034c8a53d9fe3a3f4d0956 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -139,6 +139,12 @@ enum smc_urg_state { SMC_URG_READ = 3, /* data was already read */ }; +struct smc_mark_woken { + bool woken; + void *key; + wait_queue_entry_t wait_entry; +}; + struct smc_connection { struct rb_node alert_node; struct smc_link_group *lgr; /* link group of connection */ @@ -228,8 +234,14 @@ struct smc_connection { struct smc_sock { /* smc sock container */ struct sock sk; struct socket *clcsock; /* internal tcp socket */ + void (*clcsk_state_change)(struct sock *sk); + /* original stat_change fct. */ void (*clcsk_data_ready)(struct sock *sk); - /* original data_ready fct. **/ + /* original data_ready fct. */ + void (*clcsk_write_space)(struct sock *sk); + /* original write_space fct. */ + void (*clcsk_error_report)(struct sock *sk); + /* original error_report fct. */ struct smc_connection conn; /* smc connection */ struct smc_sock *listen_smc; /* listen parent */ struct work_struct connect_work; /* handle non-blocking connect*/ @@ -264,6 +276,12 @@ static inline struct smc_sock *smc_sk(const struct sock *sk) return (struct smc_sock *)sk; } +static inline struct smc_sock *smc_clcsock_user_data(struct sock *clcsk) +{ + return (struct smc_sock *) + ((uintptr_t)clcsk->sk_user_data & ~SK_USER_DATA_NOCOPY); +} + extern struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ extern struct workqueue_struct *smc_close_wq; /* wq for close work */ diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index b8898c787d233cc0e18af0caeade68dfb89fefab..1fca2f90a9c791fc6e72f0432bae481d67120648 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -146,13 +146,11 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, (req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && !list_empty(&smc->conn.lgr->list)) { struct smc_link *link = smc->conn.lnk; - struct net *net = read_pnet(&link->smcibdev->ibdev->coredev.rdma_net); struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = link->ibport, .lnk[0].link_id = link->link_id, - .lnk[0].net_cookie = net->net_cookie, }; memcpy(linfo.lnk[0].ibname, diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 291f1484a1b74c0a793ab3c4f3ef90804d1f9932..0599246c037690b4b01813956e4af74519277bea 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -368,9 +368,6 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, new_pe->type = SMC_PNET_ETH; memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN); strncpy(new_pe->eth_name, eth_name, IFNAMSIZ); - new_pe->ndev = ndev; - if (ndev) - netdev_tracker_alloc(ndev, &new_pe->dev_tracker, GFP_KERNEL); rc = -EEXIST; new_netdev = true; write_lock(&pnettable->lock); @@ -382,6 +379,11 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, } } if (new_netdev) { + if (ndev) { + new_pe->ndev = ndev; + netdev_tracker_alloc(ndev, &new_pe->dev_tracker, + GFP_ATOMIC); + } list_add_tail(&new_pe->list, &pnettable->pnetlist); write_unlock(&pnettable->lock); } else { diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index fe97f31065367e633659fee5fc57dc2a15948e07..4a4082bb22ada4ee8f3f9d3d76eb0461afa5874c 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -222,10 +222,8 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size, if (ret) return ret; - if (!ret) { - *buf_in = buf; - *body_size = toksize; - } + *buf_in = buf; + *body_size = toksize; return ret; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index b87565b64928d4d6bbb253890109a745c2bd92ee..c2ba9d4cd2c777a212c45a0c51fd541214597a06 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -1433,7 +1433,7 @@ static bool use_gss_proxy(struct net *net) static ssize_t write_gssp(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - struct net *net = PDE_DATA(file_inode(file)); + struct net *net = pde_data(file_inode(file)); char tbuf[20]; unsigned long i; int res; @@ -1461,7 +1461,7 @@ static ssize_t write_gssp(struct file *file, const char __user *buf, static ssize_t read_gssp(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - struct net *net = PDE_DATA(file_inode(file)); + struct net *net = pde_data(file_inode(file)); struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); unsigned long p = *ppos; char tbuf[10]; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 59641803472c69e63aae99349ccb6b44712d00c7..bb1177395b99db6bc02dfec6ffd5e5430299d681 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1536,7 +1536,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf, static ssize_t cache_read_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE_DATA(file_inode(filp)); + struct cache_detail *cd = pde_data(file_inode(filp)); return cache_read(filp, buf, count, ppos, cd); } @@ -1544,14 +1544,14 @@ static ssize_t cache_read_procfs(struct file *filp, char __user *buf, static ssize_t cache_write_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE_DATA(file_inode(filp)); + struct cache_detail *cd = pde_data(file_inode(filp)); return cache_write(filp, buf, count, ppos, cd); } static __poll_t cache_poll_procfs(struct file *filp, poll_table *wait) { - struct cache_detail *cd = PDE_DATA(file_inode(filp)); + struct cache_detail *cd = pde_data(file_inode(filp)); return cache_poll(filp, wait, cd); } @@ -1560,21 +1560,21 @@ static long cache_ioctl_procfs(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return cache_ioctl(inode, filp, cmd, arg, cd); } static int cache_open_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return cache_open(inode, filp, cd); } static int cache_release_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return cache_release(inode, filp, cd); } @@ -1591,14 +1591,14 @@ static const struct proc_ops cache_channel_proc_ops = { static int content_open_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return content_open(inode, filp, cd); } static int content_release_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return content_release(inode, filp, cd); } @@ -1612,14 +1612,14 @@ static const struct proc_ops content_proc_ops = { static int open_flush_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return open_flush(inode, filp, cd); } static int release_flush_procfs(struct inode *inode, struct file *filp) { - struct cache_detail *cd = PDE_DATA(inode); + struct cache_detail *cd = pde_data(inode); return release_flush(inode, filp, cd); } @@ -1627,7 +1627,7 @@ static int release_flush_procfs(struct inode *inode, struct file *filp) static ssize_t read_flush_procfs(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE_DATA(file_inode(filp)); + struct cache_detail *cd = pde_data(file_inode(filp)); return read_flush(filp, buf, count, ppos, cd); } @@ -1636,7 +1636,7 @@ static ssize_t write_flush_procfs(struct file *filp, const char __user *buf, size_t count, loff_t *ppos) { - struct cache_detail *cd = PDE_DATA(file_inode(filp)); + struct cache_detail *cd = pde_data(file_inode(filp)); return write_flush(filp, buf, count, ppos, cd); } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index a312ea2bc4405917e6e9ec6b41974a32e931f729..c83fe618767c4d486ee9a467d901f44e48042685 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2900,7 +2900,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, unsigned long connect_timeout; unsigned long reconnect_timeout; unsigned char resvport, reuseport; - int ret = 0; + int ret = 0, ident; rcu_read_lock(); xps = xprt_switch_get(rcu_dereference(clnt->cl_xpi.xpi_xpswitch)); @@ -2914,8 +2914,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, reuseport = xprt->reuseport; connect_timeout = xprt->connect_timeout; reconnect_timeout = xprt->max_reconnect_timeout; + ident = xprt->xprt_class->ident; rcu_read_unlock(); + if (!xprtargs->ident) + xprtargs->ident = ident; xprt = xprt_create_transport(xprtargs); if (IS_ERR(xprt)) { ret = PTR_ERR(xprt); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index ee5336d73fddc5965753afef1f41ac6a153b30cb..35588f0afa864e961cf8d3323380395903c23184 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -600,9 +600,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } @@ -613,9 +613,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_unlink(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_unlink(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index c964b48eaabae495f6896a8658312bfc18f48769..52908f9e6eab54572f6852d725daef152163a77d 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -66,7 +66,7 @@ static int rpc_proc_show(struct seq_file *seq, void *v) { static int rpc_proc_open(struct inode *inode, struct file *file) { - return single_open(file, rpc_proc_show, PDE_DATA(inode)); + return single_open(file, rpc_proc_show, pde_data(inode)); } static const struct proc_ops rpc_proc_ops = { diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 2766dd21935b88cd1cdc0f8361d1b288e38d6047..05c758da6a92a44e064431c927b97a05504794e6 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -115,11 +115,14 @@ static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, } sock = container_of(xprt, struct sock_xprt, xprt); - if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) + mutex_lock(&sock->recv_mutex); + if (sock->sock == NULL || + kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) goto out; ret = sprintf(buf, "%pISc\n", &saddr); out: + mutex_unlock(&sock->recv_mutex); xprt_put(xprt); return ret + 1; } @@ -295,8 +298,10 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, online = 1; else if (!strncmp(buf, "remove", 6)) remove = 1; - else - return -EINVAL; + else { + count = -EINVAL; + goto out_put; + } if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) { count = -EINTR; @@ -307,25 +312,28 @@ static ssize_t rpc_sysfs_xprt_state_change(struct kobject *kobj, goto release_tasks; } if (offline) { - set_bit(XPRT_OFFLINE, &xprt->state); - spin_lock(&xps->xps_lock); - xps->xps_nactive--; - spin_unlock(&xps->xps_lock); + if (!test_and_set_bit(XPRT_OFFLINE, &xprt->state)) { + spin_lock(&xps->xps_lock); + xps->xps_nactive--; + spin_unlock(&xps->xps_lock); + } } else if (online) { - clear_bit(XPRT_OFFLINE, &xprt->state); - spin_lock(&xps->xps_lock); - xps->xps_nactive++; - spin_unlock(&xps->xps_lock); + if (test_and_clear_bit(XPRT_OFFLINE, &xprt->state)) { + spin_lock(&xps->xps_lock); + xps->xps_nactive++; + spin_unlock(&xps->xps_lock); + } } else if (remove) { if (test_bit(XPRT_OFFLINE, &xprt->state)) { - set_bit(XPRT_REMOVE, &xprt->state); - xprt_force_disconnect(xprt); - if (test_bit(XPRT_CONNECTED, &xprt->state)) { - if (!xprt->sending.qlen && - !xprt->pending.qlen && - !xprt->backlog.qlen && - !atomic_long_read(&xprt->queuelen)) - rpc_xprt_switch_remove_xprt(xps, xprt); + if (!test_and_set_bit(XPRT_REMOVE, &xprt->state)) { + xprt_force_disconnect(xprt); + if (test_bit(XPRT_CONNECTED, &xprt->state)) { + if (!xprt->sending.qlen && + !xprt->pending.qlen && + !xprt->backlog.qlen && + !atomic_long_read(&xprt->queuelen)) + rpc_xprt_switch_remove_xprt(xps, xprt); + } } } else { count = -EINVAL; @@ -422,6 +430,7 @@ static struct attribute *rpc_sysfs_xprt_attrs[] = { &rpc_sysfs_xprt_change_state.attr, NULL, }; +ATTRIBUTE_GROUPS(rpc_sysfs_xprt); static struct kobj_attribute rpc_sysfs_xprt_switch_info = __ATTR(xprt_switch_info, 0444, rpc_sysfs_xprt_switch_info_show, NULL); @@ -430,6 +439,7 @@ static struct attribute *rpc_sysfs_xprt_switch_attrs[] = { &rpc_sysfs_xprt_switch_info.attr, NULL, }; +ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch); static struct kobj_type rpc_sysfs_client_type = { .release = rpc_sysfs_client_release, @@ -439,14 +449,14 @@ static struct kobj_type rpc_sysfs_client_type = { static struct kobj_type rpc_sysfs_xprt_switch_type = { .release = rpc_sysfs_xprt_switch_release, - .default_attrs = rpc_sysfs_xprt_switch_attrs, + .default_groups = rpc_sysfs_xprt_switch_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_xprt_switch_namespace, }; static struct kobj_type rpc_sysfs_xprt_type = { .release = rpc_sysfs_xprt_release, - .default_attrs = rpc_sysfs_xprt_attrs, + .default_groups = rpc_sysfs_xprt_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = rpc_sysfs_xprt_namespace, }; diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 17f174d6ea3b9b29c26ea93987d8c593d132c4ef..faba7136dd9a3b6443724972ed7582580043130e 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -13,10 +13,6 @@ #include "xprt_rdma.h" #include -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - #undef RPCRDMA_BACKCHANNEL_DEBUG /** diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ff699307e8200e25d94cb95b2d83492739b2bce4..515dd7a66a04a0f37d3eb9034045d773689d8c5c 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -45,10 +45,6 @@ #include "xprt_rdma.h" #include -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - static void frwr_cid_init(struct rpcrdma_ep *ep, struct rpcrdma_mr *mr) { diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 8035a983c8cec0306b0c0a7c23bcf7f85af7c621..281ddb87ac8d5f9fa0ef7c7ec782e63f70d97873 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -54,10 +54,6 @@ #include "xprt_rdma.h" #include -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - /* Returns size of largest RPC-over-RDMA header in a Call message * * The largest Call header contains a full-size Read list and a diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 16e5696314a4f31cba124612f800d7e5b1daf00c..42e375dbdadb4c1fb6d14fe31f1cb07f83d3e9db 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -60,10 +60,6 @@ #include "xprt_rdma.h" #include -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - /* * tunables */ diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3d3673ba9e1e507097090faa20bf9c7506dee7e0..7b5fce2faa10b40e6726aa62d0e06a85203113aa 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -63,17 +63,6 @@ #include "xprt_rdma.h" #include -/* - * Globals/Macros - */ - -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - -/* - * internal functions - */ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, @@ -274,8 +263,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->re_connect_status = -ENETUNREACH; goto wake_connect_worker; case RDMA_CM_EVENT_REJECTED: - dprintk("rpcrdma: connection to %pISpc rejected: %s\n", - sap, rdma_reject_msg(id, event->status)); ep->re_connect_status = -ECONNREFUSED; if (event->status == IB_CM_REJ_STALE_CONN) ep->re_connect_status = -ENOTCONN; @@ -291,8 +278,6 @@ disconnected: break; } - dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap, - ep->re_id->device->name, rdma_event_msg(event->event)); return 0; } @@ -419,14 +404,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_attr.qp_type = IB_QPT_RC; ep->re_attr.port_num = ~0; - dprintk("RPC: %s: requested max: dtos: send %d recv %d; " - "iovs: send %d recv %d\n", - __func__, - ep->re_attr.cap.max_send_wr, - ep->re_attr.cap.max_recv_wr, - ep->re_attr.cap.max_send_sge, - ep->re_attr.cap.max_recv_sge); - ep->re_send_batch = ep->re_max_requests >> 3; ep->re_send_count = ep->re_send_batch; init_waitqueue_head(&ep->re_connect_wait); @@ -436,6 +413,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) IB_POLL_WORKQUEUE); if (IS_ERR(ep->re_attr.send_cq)) { rc = PTR_ERR(ep->re_attr.send_cq); + ep->re_attr.send_cq = NULL; goto out_destroy; } @@ -444,6 +422,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) IB_POLL_WORKQUEUE); if (IS_ERR(ep->re_attr.recv_cq)) { rc = PTR_ERR(ep->re_attr.recv_cq); + ep->re_attr.recv_cq = NULL; goto out_destroy; } ep->re_receive_count = 0; @@ -482,6 +461,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_pd = ib_alloc_pd(device, 0); if (IS_ERR(ep->re_pd)) { rc = PTR_ERR(ep->re_pd); + ep->re_pd = NULL; goto out_destroy; } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index d8ee06a9650a1697abc4f23b01f54373f49b2a32..0f39e08ee580e8f383b8a5fa6d7bd9ccc2cb0380 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1641,7 +1641,12 @@ static int xs_get_srcport(struct sock_xprt *transport) unsigned short get_srcport(struct rpc_xprt *xprt) { struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); - return xs_sock_getport(sock->sock); + unsigned short ret = 0; + mutex_lock(&sock->recv_mutex); + if (sock->sock) + ret = xs_sock_getport(sock->sock); + mutex_unlock(&sock->recv_mutex); + return ret; } EXPORT_SYMBOL(get_srcport); @@ -1910,7 +1915,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); int ret; - if (RPC_IS_ASYNC(task)) { + if (RPC_IS_ASYNC(task)) { /* * We want the AF_LOCAL connect to be resolved in the * filesystem namespace of the process making the rpc diff --git a/net/tipc/link.c b/net/tipc/link.c index 8d9e09f48f4ca14696157cc0de918d0e40051204..1e14d7f8f28f1dc19d8b96b2b139cc8b6884048b 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2200,7 +2200,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); struct tipc_gap_ack_blks *ga = NULL; bool reply = msg_probe(hdr), retransmitted = false; - u16 dlen = msg_data_sz(hdr), glen = 0; + u32 dlen = msg_data_sz(hdr), glen = 0; u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); @@ -2214,6 +2214,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, void *data; trace_tipc_proto_rcv(skb, false, l->name); + + if (dlen > U16_MAX) + goto exit; + if (tipc_link_is_blocked(l) || !xmitq) goto exit; @@ -2309,7 +2313,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* Receive Gap ACK blocks from peer if any */ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true); - + if(glen > dlen) + break; tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 407619697292f3f781c9dd6cb62aceaa762c7e29..2f4d23238a7e33a6ff22e87fca6ee0281040b338 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -496,6 +496,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, state->probing = false; /* Sanity check received domain record */ + if (new_member_cnt > MAX_MON_DOMAIN) + return; if (dlen < dom_rec_len(arrv_dom, 0)) return; if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index bda902caa81475bcd758cfa36cb259986fca9f1c..8267b751a526a4dc30062e5e9b38da92bc97266e 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -313,7 +313,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, pr_warn_ratelimited("Failed to remove binding %u,%u from %u\n", ua.sr.type, ua.sr.lower, node); } else { - pr_warn("Unrecognized name table message received\n"); + pr_warn_ratelimited("Unknown name table message received\n"); } return false; } diff --git a/samples/seccomp/dropper.c b/samples/seccomp/dropper.c index cc0648eb389e6168800d9542bf24467d7e901a92..4bca4b70f66552d989e14717ca96ac6ac9beeb83 100644 --- a/samples/seccomp/dropper.c +++ b/samples/seccomp/dropper.c @@ -25,7 +25,7 @@ #include #include -static int install_filter(int nr, int arch, int error) +static int install_filter(int arch, int nr, int error) { struct sock_filter filter[] = { BPF_STMT(BPF_LD+BPF_W+BPF_ABS, @@ -42,6 +42,10 @@ static int install_filter(int nr, int arch, int error) .len = (unsigned short)(sizeof(filter)/sizeof(filter[0])), .filter = filter, }; + if (error == -1) { + struct sock_filter kill = BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL); + filter[4] = kill; + } if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { perror("prctl(NO_NEW_PRIVS)"); return 1; @@ -57,9 +61,10 @@ int main(int argc, char **argv) { if (argc < 5) { fprintf(stderr, "Usage:\n" - "dropper []\n" + "dropper []\n" "Hint: AUDIT_ARCH_I386: 0x%X\n" " AUDIT_ARCH_X86_64: 0x%X\n" + " errno == -1 means SECCOMP_RET_KILL\n" "\n", AUDIT_ARCH_I386, AUDIT_ARCH_X86_64); return 1; } diff --git a/scripts/Makefile b/scripts/Makefile index ecd3acacd0ec5209569400851e8832e8c295d809..ce5aa9030b740a65c731c8013973e24f76f2687b 100644 --- a/scripts/Makefile +++ b/scripts/Makefile @@ -25,7 +25,7 @@ HOSTCFLAGS_sorttable.o += -I$(srctree)/tools/arch/x86/include HOSTCFLAGS_sorttable.o += -DUNWINDER_ORC_ENABLED endif -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_BUILDTIME_MCOUNT_SORT HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED endif diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index d538255038747db9aba7a3a0fc80d9a68c303af6..8be892887d71687085b94d4378c1b5dfa34df4b6 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -51,6 +51,7 @@ KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -Wno-format-zero-length KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare +KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) endif endif diff --git a/scripts/dtc/dtx_diff b/scripts/dtc/dtx_diff index d3422ee15e300bc76af3b15adc682aacf2d4e1ee..f2bbde4bba86bc70b27bb5a992a262c1997db811 100755 --- a/scripts/dtc/dtx_diff +++ b/scripts/dtc/dtx_diff @@ -59,12 +59,8 @@ Otherwise DTx is treated as a dts source file (aka .dts). or '/include/' to be processed. If DTx_1 and DTx_2 are in different architectures, then this script - may not work since \${ARCH} is part of the include path. Two possible - workarounds: - - `basename $0` \\ - <(ARCH=arch_of_dtx_1 `basename $0` DTx_1) \\ - <(ARCH=arch_of_dtx_2 `basename $0` DTx_2) + may not work since \${ARCH} is part of the include path. The following + workaround can be used: `basename $0` ARCH=arch_of_dtx_1 DTx_1 >tmp_dtx_1.dts `basename $0` ARCH=arch_of_dtx_2 DTx_2 >tmp_dtx_2.dts diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index 59717be312109e8865c0b4ce9a2730c4367d5934..d3c3a61308adaecbfa77e8b79d71878adefd6fa2 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -979,10 +979,10 @@ static int conf_write_autoconf_cmd(const char *autoconf_name) fprintf(out, "\n$(deps_config): ;\n"); - if (ferror(out)) /* error check for all fprintf() calls */ - return -1; - + ret = ferror(out); /* error check for all fprintf() calls */ fclose(out); + if (ret) + return -1; if (rename(tmp, name)) { perror("rename"); @@ -994,14 +994,19 @@ static int conf_write_autoconf_cmd(const char *autoconf_name) static int conf_touch_deps(void) { - const char *name; + const char *name, *tmp; struct symbol *sym; int res, i; - strcpy(depfile_path, "include/config/"); - depfile_prefix_len = strlen(depfile_path); - name = conf_get_autoconfig_name(); + tmp = strrchr(name, '/'); + depfile_prefix_len = tmp ? tmp - name + 1 : 0; + if (depfile_prefix_len + 1 > sizeof(depfile_path)) + return -1; + + strncpy(depfile_path, name, depfile_prefix_len); + depfile_path[depfile_prefix_len] = 0; + conf_read_simple(name, S_DEF_AUTO); sym_calc_value(modules_sym); @@ -1093,10 +1098,10 @@ static int __conf_write_autoconf(const char *filename, print_symbol(file, sym); /* check possible errors in conf_write_heading() and print_symbol() */ - if (ferror(file)) - return -1; - + ret = ferror(file); fclose(file); + if (ret) + return -1; if (rename(tmp, filename)) { perror("rename"); diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c index 0590f86df6e40cfd073100904f3ec5cfbe650f5f..748da578b418c4acb53f454e17e2370d2050ceb6 100644 --- a/scripts/kconfig/preprocess.c +++ b/scripts/kconfig/preprocess.c @@ -141,7 +141,7 @@ static char *do_lineno(int argc, char *argv[]) static char *do_shell(int argc, char *argv[]) { FILE *p; - char buf[256]; + char buf[4096]; char *cmd; size_t nread; int i; diff --git a/security/integrity/digsig_asymmetric.c b/security/integrity/digsig_asymmetric.c index 23240d793b074e645047e56169ed7d85f93c12ec..895f4b9ce8c6b2ea2785a2c8298ec11528013185 100644 --- a/security/integrity/digsig_asymmetric.c +++ b/security/integrity/digsig_asymmetric.c @@ -109,22 +109,25 @@ int asymmetric_verify(struct key *keyring, const char *sig, pk = asymmetric_key_public_key(key); pks.pkey_algo = pk->pkey_algo; - if (!strcmp(pk->pkey_algo, "rsa")) + if (!strcmp(pk->pkey_algo, "rsa")) { pks.encoding = "pkcs1"; - else if (!strncmp(pk->pkey_algo, "ecdsa-", 6)) + } else if (!strncmp(pk->pkey_algo, "ecdsa-", 6)) { /* edcsa-nist-p192 etc. */ pks.encoding = "x962"; - else if (!strcmp(pk->pkey_algo, "ecrdsa") || - !strcmp(pk->pkey_algo, "sm2")) + } else if (!strcmp(pk->pkey_algo, "ecrdsa") || + !strcmp(pk->pkey_algo, "sm2")) { pks.encoding = "raw"; - else - return -ENOPKG; + } else { + ret = -ENOPKG; + goto out; + } pks.digest = (u8 *)data; pks.digest_size = datalen; pks.s = hdr->sig; pks.s_size = siglen; ret = verify_signature(key, &pks); +out: key_put(key); pr_debug("%s() = %d\n", __func__, ret); return ret; diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index 3d8e9d5db5aa5125b1136974f47f3a9062d3ddb9..3ad8f7734208b93a8ecf02a7af5b3df248d5decb 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -496,12 +496,12 @@ int __init ima_fs_init(void) return 0; out: + securityfs_remove(ima_policy); securityfs_remove(violations); securityfs_remove(runtime_measurements_count); securityfs_remove(ascii_runtime_measurements); securityfs_remove(binary_runtime_measurements); securityfs_remove(ima_symlink); securityfs_remove(ima_dir); - securityfs_remove(ima_policy); return -1; } diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 320ca80aacab577c31e8bad997695939bde13c23..2a1f6418b10a62f93b8100434eafd100419e2588 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -1967,6 +1967,14 @@ int ima_policy_show(struct seq_file *m, void *v) rcu_read_lock(); + /* Do not print rules with inactive LSM labels */ + for (i = 0; i < MAX_LSM_RULES; i++) { + if (entry->lsm[i].args_p && !entry->lsm[i].rule) { + rcu_read_unlock(); + return 0; + } + } + if (entry->action & MEASURE) seq_puts(m, pt(Opt_measure)); if (entry->action & DONT_MEASURE) diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c index 694560396be05719bce56b8c8506812c9d861ee3..db1ad6d7a57fbf7d5866ccf4d6cafb397c6f4f3f 100644 --- a/security/integrity/ima/ima_template.c +++ b/security/integrity/ima/ima_template.c @@ -29,6 +29,7 @@ static struct ima_template_desc builtin_templates[] = { static LIST_HEAD(defined_templates); static DEFINE_SPINLOCK(template_list); +static int template_setup_done; static const struct ima_template_field supported_fields[] = { {.field_id = "d", .field_init = ima_eventdigest_init, @@ -101,10 +102,11 @@ static int __init ima_template_setup(char *str) struct ima_template_desc *template_desc; int template_len = strlen(str); - if (ima_template) + if (template_setup_done) return 1; - ima_init_template_list(); + if (!ima_template) + ima_init_template_list(); /* * Verify that a template with the supplied name exists. @@ -128,6 +130,7 @@ static int __init ima_template_setup(char *str) } ima_template = template_desc; + template_setup_done = 1; return 1; } __setup("ima_template=", ima_template_setup); @@ -136,7 +139,7 @@ static int __init ima_template_fmt_setup(char *str) { int num_templates = ARRAY_SIZE(builtin_templates); - if (ima_template) + if (template_setup_done) return 1; if (template_desc_init_fields(str, NULL, NULL) < 0) { @@ -147,6 +150,7 @@ static int __init ima_template_fmt_setup(char *str) builtin_templates[num_templates - 1].fmt = str; ima_template = builtin_templates + num_templates - 1; + template_setup_done = 1; return 1; } diff --git a/security/integrity/integrity_audit.c b/security/integrity/integrity_audit.c index 29220056207f4e51e83afda6965816d7032d5f6e..0ec5e4c22cb2a1066c2b897776ead6d3db72635c 100644 --- a/security/integrity/integrity_audit.c +++ b/security/integrity/integrity_audit.c @@ -45,6 +45,8 @@ void integrity_audit_message(int audit_msgno, struct inode *inode, return; ab = audit_log_start(audit_context(), GFP_KERNEL, audit_msgno); + if (!ab) + return; audit_log_format(ab, "pid=%d uid=%u auid=%u ses=%u", task_pid_nr(current), from_kuid(&init_user_ns, current_uid()), diff --git a/security/security.c b/security/security.c index 3d4eb474f35b0c7f77ffead5570d52dd9bcf3df3..22261d79f33333a85f2eb29b9ee47dc6db2df14d 100644 --- a/security/security.c +++ b/security/security.c @@ -1048,8 +1048,19 @@ int security_dentry_init_security(struct dentry *dentry, int mode, const char **xattr_name, void **ctx, u32 *ctxlen) { - return call_int_hook(dentry_init_security, -EOPNOTSUPP, dentry, mode, - name, xattr_name, ctx, ctxlen); + struct security_hook_list *hp; + int rc; + + /* + * Only one module will provide a security context. + */ + hlist_for_each_entry(hp, &security_hook_heads.dentry_init_security, list) { + rc = hp->hook.dentry_init_security(dentry, mode, name, + xattr_name, ctx, ctxlen); + if (rc != LSM_RET_DEFAULT(dentry_init_security)) + return rc; + } + return LSM_RET_DEFAULT(dentry_init_security); } EXPORT_SYMBOL(security_dentry_init_security); diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index 2ec6e5cd25d9b6a057597acc767811f15a18823b..feb206f3acb4a17b7ea777d926c60552d3364b62 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -152,6 +152,8 @@ static void cond_list_destroy(struct policydb *p) for (i = 0; i < p->cond_list_len; i++) cond_node_destroy(&p->cond_list[i]); kfree(p->cond_list); + p->cond_list = NULL; + p->cond_list_len = 0; } void cond_policydb_destroy(struct policydb *p) @@ -441,7 +443,6 @@ int cond_read_list(struct policydb *p, void *fp) return 0; err: cond_list_destroy(p); - p->cond_list = NULL; return rc; } diff --git a/sound/core/info.c b/sound/core/info.c index a451b24199c3ee66a0a28a0aedd3240d015c6b1b..782fba87cc043a156427d6c5500b13a63da9fe06 100644 --- a/sound/core/info.c +++ b/sound/core/info.c @@ -234,7 +234,7 @@ static int snd_info_entry_mmap(struct file *file, struct vm_area_struct *vma) static int snd_info_entry_open(struct inode *inode, struct file *file) { - struct snd_info_entry *entry = PDE_DATA(inode); + struct snd_info_entry *entry = pde_data(inode); struct snd_info_private_data *data; int mode, err; @@ -365,7 +365,7 @@ static int snd_info_seq_show(struct seq_file *seq, void *p) static int snd_info_text_entry_open(struct inode *inode, struct file *file) { - struct snd_info_entry *entry = PDE_DATA(inode); + struct snd_info_entry *entry = pde_data(inode); struct snd_info_private_data *data; int err; diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 621883e711949ed0067e0318d325686c808b0644..a056b3ef3c84397aae2a27348c3ca388e0e5e983 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -172,6 +172,19 @@ unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream) } EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave); +unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream) +{ + unsigned long flags = 0; + if (substream->pcm->nonatomic) + mutex_lock_nested(&substream->self_group.mutex, + SINGLE_DEPTH_NESTING); + else + spin_lock_irqsave_nested(&substream->self_group.lock, flags, + SINGLE_DEPTH_NESTING); + return flags; +} +EXPORT_SYMBOL_GPL(_snd_pcm_stream_lock_irqsave_nested); + /** * snd_pcm_stream_unlock_irqrestore - Unlock the PCM stream * @substream: PCM substream diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c index b7758dbe23714a98752f572899a178efd3f7e06c..5cb92f7ccbcac991f95aa1fd2ecea60486fe9248 100644 --- a/sound/hda/intel-sdw-acpi.c +++ b/sound/hda/intel-sdw-acpi.c @@ -50,11 +50,11 @@ static bool is_link_enabled(struct fwnode_handle *fw_node, int i) static int sdw_intel_scan_controller(struct sdw_intel_acpi_info *info) { - struct acpi_device *adev; + struct acpi_device *adev = acpi_fetch_acpi_dev(info->handle); int ret, i; u8 count; - if (acpi_bus_get_device(info->handle, &adev)) + if (!adev) return -EINVAL; /* Found controller, find links supported */ @@ -119,7 +119,6 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, void *cdata, void **return_value) { struct sdw_intel_acpi_info *info = cdata; - struct acpi_device *adev; acpi_status status; u64 adr; @@ -127,7 +126,7 @@ static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level, if (ACPI_FAILURE(status)) return AE_OK; /* keep going */ - if (acpi_bus_get_device(handle, &adev)) { + if (!acpi_fetch_acpi_dev(handle)) { pr_err("%s: Couldn't find ACPI handle\n", __func__); return AE_NOT_FOUND; } diff --git a/sound/pci/hda/hda_auto_parser.c b/sound/pci/hda/hda_auto_parser.c index 82c492b056671b1491ae533c86ffd18e8703f846..cd1db943b7e0726587784ffaa82471638eb6bf79 100644 --- a/sound/pci/hda/hda_auto_parser.c +++ b/sound/pci/hda/hda_auto_parser.c @@ -981,7 +981,7 @@ void snd_hda_pick_fixup(struct hda_codec *codec, int id = HDA_FIXUP_ID_NOT_SET; const char *name = NULL; const char *type = NULL; - int vendor, device; + unsigned int vendor, device; if (codec->fixup_id != HDA_FIXUP_ID_NOT_SET) return; diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 7016b48227bf28d5f35ed54b656d7217eae467d9..f552785d301e06103721c8816bae4858905e2573 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -3000,6 +3000,10 @@ void snd_hda_codec_shutdown(struct hda_codec *codec) { struct hda_pcm *cpcm; + /* Skip the shutdown if codec is not registered */ + if (!codec->registered) + return; + list_for_each_entry(cpcm, &codec->pcm_list_head, list) snd_pcm_suspend_all(cpcm->pcm); diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 3bf5e341070388864b8594d75a6959940190b453..fc114e5224806f44cfffd3797997c17fc0a9087b 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -91,6 +91,12 @@ static void snd_hda_gen_spec_free(struct hda_gen_spec *spec) free_kctls(spec); snd_array_free(&spec->paths); snd_array_free(&spec->loopback_list); +#ifdef CONFIG_SND_HDA_GENERIC_LEDS + if (spec->led_cdevs[LED_AUDIO_MUTE]) + led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MUTE]); + if (spec->led_cdevs[LED_AUDIO_MICMUTE]) + led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MICMUTE]); +#endif } /* @@ -3922,7 +3928,10 @@ static int create_mute_led_cdev(struct hda_codec *codec, enum led_brightness), bool micmute) { + struct hda_gen_spec *spec = codec->spec; struct led_classdev *cdev; + int idx = micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE; + int err; cdev = devm_kzalloc(&codec->core.dev, sizeof(*cdev), GFP_KERNEL); if (!cdev) @@ -3932,10 +3941,14 @@ static int create_mute_led_cdev(struct hda_codec *codec, cdev->max_brightness = 1; cdev->default_trigger = micmute ? "audio-micmute" : "audio-mute"; cdev->brightness_set_blocking = callback; - cdev->brightness = ledtrig_audio_get(micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE); + cdev->brightness = ledtrig_audio_get(idx); cdev->flags = LED_CORE_SUSPENDRESUME; - return devm_led_classdev_register(&codec->core.dev, cdev); + err = led_classdev_register(&codec->core.dev, cdev); + if (err < 0) + return err; + spec->led_cdevs[idx] = cdev; + return 0; } /** diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 8e1bc8ea74fc3269f60904224dce91eedce01958..34eba40cc6e67b2573482ccb2534d8636c245301 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -294,6 +294,9 @@ struct hda_gen_spec { struct hda_jack_callback *cb); void (*mic_autoswitch_hook)(struct hda_codec *codec, struct hda_jack_callback *cb); + + /* leds */ + struct led_classdev *led_cdevs[NUM_AUDIO_LEDS]; }; /* values for add_stereo_mix_input flag */ diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 668274e52674591b843df7f2256a962046ad6fde..8315bf7d4c381d2b0faad840c3a9f065b2c0285b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -98,6 +98,7 @@ struct alc_spec { unsigned int gpio_mic_led_mask; struct alc_coef_led mute_led_coef; struct alc_coef_led mic_led_coef; + struct mutex coef_mutex; hda_nid_t headset_mic_pin; hda_nid_t headphone_mic_pin; @@ -137,8 +138,8 @@ struct alc_spec { * COEF access helper functions */ -static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, - unsigned int coef_idx) +static int __alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, + unsigned int coef_idx) { unsigned int val; @@ -147,28 +148,61 @@ static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, return val; } +static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, + unsigned int coef_idx) +{ + struct alc_spec *spec = codec->spec; + unsigned int val; + + mutex_lock(&spec->coef_mutex); + val = __alc_read_coefex_idx(codec, nid, coef_idx); + mutex_unlock(&spec->coef_mutex); + return val; +} + #define alc_read_coef_idx(codec, coef_idx) \ alc_read_coefex_idx(codec, 0x20, coef_idx) -static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid, - unsigned int coef_idx, unsigned int coef_val) +static void __alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid, + unsigned int coef_idx, unsigned int coef_val) { snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_COEF_INDEX, coef_idx); snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PROC_COEF, coef_val); } +static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid, + unsigned int coef_idx, unsigned int coef_val) +{ + struct alc_spec *spec = codec->spec; + + mutex_lock(&spec->coef_mutex); + __alc_write_coefex_idx(codec, nid, coef_idx, coef_val); + mutex_unlock(&spec->coef_mutex); +} + #define alc_write_coef_idx(codec, coef_idx, coef_val) \ alc_write_coefex_idx(codec, 0x20, coef_idx, coef_val) +static void __alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid, + unsigned int coef_idx, unsigned int mask, + unsigned int bits_set) +{ + unsigned int val = __alc_read_coefex_idx(codec, nid, coef_idx); + + if (val != -1) + __alc_write_coefex_idx(codec, nid, coef_idx, + (val & ~mask) | bits_set); +} + static void alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid, unsigned int coef_idx, unsigned int mask, unsigned int bits_set) { - unsigned int val = alc_read_coefex_idx(codec, nid, coef_idx); + struct alc_spec *spec = codec->spec; - if (val != -1) - alc_write_coefex_idx(codec, nid, coef_idx, - (val & ~mask) | bits_set); + mutex_lock(&spec->coef_mutex); + __alc_update_coefex_idx(codec, nid, coef_idx, mask, bits_set); + mutex_unlock(&spec->coef_mutex); } #define alc_update_coef_idx(codec, coef_idx, mask, bits_set) \ @@ -201,13 +235,17 @@ struct coef_fw { static void alc_process_coef_fw(struct hda_codec *codec, const struct coef_fw *fw) { + struct alc_spec *spec = codec->spec; + + mutex_lock(&spec->coef_mutex); for (; fw->nid; fw++) { if (fw->mask == (unsigned short)-1) - alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val); + __alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val); else - alc_update_coefex_idx(codec, fw->nid, fw->idx, - fw->mask, fw->val); + __alc_update_coefex_idx(codec, fw->nid, fw->idx, + fw->mask, fw->val); } + mutex_unlock(&spec->coef_mutex); } /* @@ -1153,6 +1191,7 @@ static int alc_alloc_spec(struct hda_codec *codec, hda_nid_t mixer_nid) codec->spdif_status_reset = 1; codec->forced_resume = 1; codec->patch_ops = alc_patch_ops; + mutex_init(&spec->coef_mutex); err = alc_codec_rename_from_preset(codec); if (err < 0) { @@ -2125,6 +2164,7 @@ static void alc1220_fixup_gb_x570(struct hda_codec *codec, { static const hda_nid_t conn1[] = { 0x0c }; static const struct coef_fw gb_x570_coefs[] = { + WRITE_COEF(0x07, 0x03c0), WRITE_COEF(0x1a, 0x01c1), WRITE_COEF(0x1b, 0x0202), WRITE_COEF(0x43, 0x3005), @@ -2551,7 +2591,8 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_GB_X570), - SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_GB_X570), + SND_PCI_QUIRK(0x1458, 0xa0d5, "Gigabyte X570S Aorus Master", ALC1220_FIXUP_GB_X570), SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1229, "MSI-GP73", ALC1220_FIXUP_CLEVO_P950), @@ -2626,6 +2667,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = { {.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"}, {.id = ALC887_FIXUP_ASUS_BASS, .name = "asus-bass"}, {.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"}, + {.id = ALC1220_FIXUP_GB_X570, .name = "gb-x570"}, {.id = ALC1220_FIXUP_CLEVO_P950, .name = "clevo-p950"}, {} }; @@ -8969,6 +9011,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), diff --git a/sound/soc/amd/acp/acp-mach-common.c b/sound/soc/amd/acp/acp-mach-common.c index c9caade5cb74687a5eef18df160725b1581108f7..cd05ee2802c9e355119dad8a751d387780e0c497 100644 --- a/sound/soc/amd/acp/acp-mach-common.c +++ b/sound/soc/amd/acp/acp-mach-common.c @@ -303,11 +303,11 @@ static const struct snd_soc_dapm_route rt1019_map_lr[] = { static struct snd_soc_codec_conf rt1019_conf[] = { { - .dlc = COMP_CODEC_CONF("i2c-10EC1019:00"), + .dlc = COMP_CODEC_CONF("i2c-10EC1019:01"), .name_prefix = "Left", }, { - .dlc = COMP_CODEC_CONF("i2c-10EC1019:01"), + .dlc = COMP_CODEC_CONF("i2c-10EC1019:00"), .name_prefix = "Right", }, }; diff --git a/sound/soc/codecs/cpcap.c b/sound/soc/codecs/cpcap.c index 598e09024e2390ce9e5d0958802a30739c2a66bd..ffdf8b615efaa0d631be0997726d5d9344038155 100644 --- a/sound/soc/codecs/cpcap.c +++ b/sound/soc/codecs/cpcap.c @@ -1667,6 +1667,8 @@ static int cpcap_codec_probe(struct platform_device *pdev) { struct device_node *codec_node = of_get_child_by_name(pdev->dev.parent->of_node, "audio-codec"); + if (!codec_node) + return -ENODEV; pdev->dev.of_node = codec_node; diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c index b61f980cabdc0233e8b6265aecd683ad99420574..b07607a9ecea4e43c359da1ca124cce1f436dfd9 100644 --- a/sound/soc/codecs/hdmi-codec.c +++ b/sound/soc/codecs/hdmi-codec.c @@ -277,7 +277,7 @@ struct hdmi_codec_priv { bool busy; struct snd_soc_jack *jack; unsigned int jack_status; - u8 iec_status[5]; + u8 iec_status[AES_IEC958_STATUS_SIZE]; }; static const struct snd_soc_dapm_widget hdmi_widgets[] = { diff --git a/sound/soc/codecs/lpass-rx-macro.c b/sound/soc/codecs/lpass-rx-macro.c index aec5127260fd4e00f0dd4167ab8e940fad7e9db2..6ffe88345de5ffdc6abb6fb030ee82267f82966a 100644 --- a/sound/soc/codecs/lpass-rx-macro.c +++ b/sound/soc/codecs/lpass-rx-macro.c @@ -2688,8 +2688,8 @@ static uint32_t get_iir_band_coeff(struct snd_soc_component *component, int reg, b2_reg; /* Address does not automatically update if reading */ - reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 16 * iir_idx; - b2_reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 16 * iir_idx; + reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 0x80 * iir_idx; + b2_reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 0x80 * iir_idx; snd_soc_component_write(component, reg, ((band_idx * BAND_MAX + coeff_idx) * @@ -2718,7 +2718,7 @@ static uint32_t get_iir_band_coeff(struct snd_soc_component *component, static void set_iir_band_coeff(struct snd_soc_component *component, int iir_idx, int band_idx, uint32_t value) { - int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 16 * iir_idx; + int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B2_CTL + 0x80 * iir_idx; snd_soc_component_write(component, reg, (value & 0xFF)); snd_soc_component_write(component, reg, (value >> 8) & 0xFF); @@ -2739,7 +2739,7 @@ static int rx_macro_put_iir_band_audio_mixer( int iir_idx = ctl->iir_idx; int band_idx = ctl->band_idx; u32 coeff[BAND_MAX]; - int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 16 * iir_idx; + int reg = CDC_RX_SIDETONE_IIR0_IIR_COEF_B1_CTL + 0x80 * iir_idx; memcpy(&coeff[0], ucontrol->value.bytes.data, params->max); diff --git a/sound/soc/codecs/max9759.c b/sound/soc/codecs/max9759.c index d75fd61b90321bee1029490f4f39f48f3f78fd5d..bc57d7687f16a0cee010c65cf19f25b0b3574800 100644 --- a/sound/soc/codecs/max9759.c +++ b/sound/soc/codecs/max9759.c @@ -64,7 +64,8 @@ static int speaker_gain_control_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *c = snd_soc_kcontrol_component(kcontrol); struct max9759 *priv = snd_soc_component_get_drvdata(c); - if (ucontrol->value.integer.value[0] > 3) + if (ucontrol->value.integer.value[0] < 0 || + ucontrol->value.integer.value[0] > 3) return -EINVAL; priv->gain = ucontrol->value.integer.value[0]; diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c index 20e0f90ea49863dddc81930ebe93adfcd130850d..20fc0f3766dedc4ce03b308985420831534dd557 100644 --- a/sound/soc/codecs/rt5682-i2c.c +++ b/sound/soc/codecs/rt5682-i2c.c @@ -59,18 +59,12 @@ static void rt5682_jd_check_handler(struct work_struct *work) struct rt5682_priv *rt5682 = container_of(work, struct rt5682_priv, jd_check_work.work); - if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) - & RT5682_JDH_RS_MASK) { + if (snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) & RT5682_JDH_RS_MASK) /* jack out */ - rt5682->jack_type = rt5682_headset_detect(rt5682->component, 0); - - snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type, - SND_JACK_HEADSET | - SND_JACK_BTN_0 | SND_JACK_BTN_1 | - SND_JACK_BTN_2 | SND_JACK_BTN_3); - } else { + mod_delayed_work(system_power_efficient_wq, + &rt5682->jack_detect_work, 0); + else schedule_delayed_work(&rt5682->jd_check_work, 500); - } } static irqreturn_t rt5682_irq(int irq, void *data) @@ -198,7 +192,6 @@ static int rt5682_i2c_probe(struct i2c_client *i2c, } mutex_init(&rt5682->calibrate_mutex); - mutex_init(&rt5682->jdet_mutex); rt5682_calibrate(rt5682); rt5682_apply_patch_list(rt5682, &i2c->dev); diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 415ec564c82e224c33a85fd92ada9b620d1b09c2..0a0ec4a021e111785b34875b8fbadbbaf2608343 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -922,15 +922,13 @@ static void rt5682_enable_push_button_irq(struct snd_soc_component *component, * * Returns detect status. */ -int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) +static int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) { struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); struct snd_soc_dapm_context *dapm = &component->dapm; unsigned int val, count; if (jack_insert) { - snd_soc_dapm_mutex_lock(dapm); - snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_VREF2 | RT5682_PWR_MB, RT5682_PWR_VREF2 | RT5682_PWR_MB); @@ -981,8 +979,6 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) snd_soc_component_update_bits(component, RT5682_MICBIAS_2, RT5682_PWR_CLK25M_MASK | RT5682_PWR_CLK1M_MASK, RT5682_PWR_CLK25M_PU | RT5682_PWR_CLK1M_PU); - - snd_soc_dapm_mutex_unlock(dapm); } else { rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, @@ -1011,7 +1007,6 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) dev_dbg(component->dev, "jack_type = %d\n", rt5682->jack_type); return rt5682->jack_type; } -EXPORT_SYMBOL_GPL(rt5682_headset_detect); static int rt5682_set_jack_detect(struct snd_soc_component *component, struct snd_soc_jack *hs_jack, void *data) @@ -1094,6 +1089,7 @@ void rt5682_jack_detect_handler(struct work_struct *work) { struct rt5682_priv *rt5682 = container_of(work, struct rt5682_priv, jack_detect_work.work); + struct snd_soc_dapm_context *dapm; int val, btn_type; while (!rt5682->component) @@ -1102,7 +1098,9 @@ void rt5682_jack_detect_handler(struct work_struct *work) while (!rt5682->component->card->instantiated) usleep_range(10000, 15000); - mutex_lock(&rt5682->jdet_mutex); + dapm = snd_soc_component_get_dapm(rt5682->component); + + snd_soc_dapm_mutex_lock(dapm); mutex_lock(&rt5682->calibrate_mutex); val = snd_soc_component_read(rt5682->component, RT5682_AJD1_CTRL) @@ -1162,6 +1160,9 @@ void rt5682_jack_detect_handler(struct work_struct *work) rt5682->irq_work_delay_time = 50; } + mutex_unlock(&rt5682->calibrate_mutex); + snd_soc_dapm_mutex_unlock(dapm); + snd_soc_jack_report(rt5682->hs_jack, rt5682->jack_type, SND_JACK_HEADSET | SND_JACK_BTN_0 | SND_JACK_BTN_1 | @@ -1174,9 +1175,6 @@ void rt5682_jack_detect_handler(struct work_struct *work) else cancel_delayed_work_sync(&rt5682->jd_check_work); } - - mutex_unlock(&rt5682->calibrate_mutex); - mutex_unlock(&rt5682->jdet_mutex); } EXPORT_SYMBOL_GPL(rt5682_jack_detect_handler); @@ -1526,7 +1524,6 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w, { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - struct rt5682_priv *rt5682 = snd_soc_component_get_drvdata(component); switch (event) { case SND_SOC_DAPM_PRE_PMU: @@ -1538,17 +1535,12 @@ static int rt5682_hp_event(struct snd_soc_dapm_widget *w, RT5682_DEPOP_1, 0x60, 0x60); snd_soc_component_update_bits(component, RT5682_DAC_ADC_DIG_VOL1, 0x00c0, 0x0080); - - mutex_lock(&rt5682->jdet_mutex); - snd_soc_component_update_bits(component, RT5682_HP_CTRL_2, RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN, RT5682_HP_C2_DAC_L_EN | RT5682_HP_C2_DAC_R_EN); usleep_range(5000, 10000); snd_soc_component_update_bits(component, RT5682_CHARGE_PUMP_1, RT5682_CP_SW_SIZE_MASK, RT5682_CP_SW_SIZE_L); - - mutex_unlock(&rt5682->jdet_mutex); break; case SND_SOC_DAPM_POST_PMD: diff --git a/sound/soc/codecs/rt5682.h b/sound/soc/codecs/rt5682.h index c917c76200ea2b44a68948b99f676c2ab613dd18..52ff0d9c36c5817d36d0b4f89a77128d47096d15 100644 --- a/sound/soc/codecs/rt5682.h +++ b/sound/soc/codecs/rt5682.h @@ -1463,7 +1463,6 @@ struct rt5682_priv { int jack_type; int irq_work_delay_time; - struct mutex jdet_mutex; }; extern const char *rt5682_supply_names[RT5682_NUM_SUPPLIES]; @@ -1473,7 +1472,6 @@ int rt5682_sel_asrc_clk_src(struct snd_soc_component *component, void rt5682_apply_patch_list(struct rt5682_priv *rt5682, struct device *dev); -int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert); void rt5682_jack_detect_handler(struct work_struct *work); bool rt5682_volatile_register(struct device *dev, unsigned int reg); diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c index eff200a07d9f4afe8cfc22a9c6a215a2a97f18e9..36cbc66914f90ab651e6998ea0b4bd79075d94e5 100644 --- a/sound/soc/codecs/wcd938x.c +++ b/sound/soc/codecs/wcd938x.c @@ -1432,14 +1432,10 @@ static int wcd938x_sdw_connect_port(struct wcd938x_sdw_ch_info *ch_info, return 0; } -static int wcd938x_connect_port(struct wcd938x_sdw_priv *wcd, u8 ch_id, u8 enable) +static int wcd938x_connect_port(struct wcd938x_sdw_priv *wcd, u8 port_num, u8 ch_id, u8 enable) { - u8 port_num; - - port_num = wcd->ch_info[ch_id].port_num; - return wcd938x_sdw_connect_port(&wcd->ch_info[ch_id], - &wcd->port_config[port_num], + &wcd->port_config[port_num - 1], enable); } @@ -2563,7 +2559,7 @@ static int wcd938x_ear_pa_put_gain(struct snd_kcontrol *kcontrol, WCD938X_EAR_GAIN_MASK, ucontrol->value.integer.value[0]); - return 0; + return 1; } static int wcd938x_get_compander(struct snd_kcontrol *kcontrol, @@ -2593,6 +2589,7 @@ static int wcd938x_set_compander(struct snd_kcontrol *kcontrol, struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component); struct wcd938x_sdw_priv *wcd; int value = ucontrol->value.integer.value[0]; + int portidx; struct soc_mixer_control *mc; bool hphr; @@ -2606,12 +2603,14 @@ static int wcd938x_set_compander(struct snd_kcontrol *kcontrol, else wcd938x->comp1_enable = value; + portidx = wcd->ch_info[mc->reg].port_num; + if (value) - wcd938x_connect_port(wcd, mc->reg, true); + wcd938x_connect_port(wcd, portidx, mc->reg, true); else - wcd938x_connect_port(wcd, mc->reg, false); + wcd938x_connect_port(wcd, portidx, mc->reg, false); - return 0; + return 1; } static int wcd938x_ldoh_get(struct snd_kcontrol *kcontrol, @@ -2882,9 +2881,11 @@ static int wcd938x_get_swr_port(struct snd_kcontrol *kcontrol, struct wcd938x_sdw_priv *wcd; struct soc_mixer_control *mixer = (struct soc_mixer_control *)kcontrol->private_value; int dai_id = mixer->shift; - int portidx = mixer->reg; + int portidx, ch_idx = mixer->reg; + wcd = wcd938x->sdw_priv[dai_id]; + portidx = wcd->ch_info[ch_idx].port_num; ucontrol->value.integer.value[0] = wcd->port_enable[portidx]; @@ -2899,12 +2900,14 @@ static int wcd938x_set_swr_port(struct snd_kcontrol *kcontrol, struct wcd938x_sdw_priv *wcd; struct soc_mixer_control *mixer = (struct soc_mixer_control *)kcontrol->private_value; - int portidx = mixer->reg; + int ch_idx = mixer->reg; + int portidx; int dai_id = mixer->shift; bool enable; wcd = wcd938x->sdw_priv[dai_id]; + portidx = wcd->ch_info[ch_idx].port_num; if (ucontrol->value.integer.value[0]) enable = true; else @@ -2912,9 +2915,9 @@ static int wcd938x_set_swr_port(struct snd_kcontrol *kcontrol, wcd->port_enable[portidx] = enable; - wcd938x_connect_port(wcd, portidx, enable); + wcd938x_connect_port(wcd, portidx, ch_idx, enable); - return 0; + return 1; } diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c index af3c3b90c0acab59c88d0050838edcc23a848c0b..83b4a22bf15ac0b8883f3144c55f34d5959f6012 100644 --- a/sound/soc/fsl/pcm030-audio-fabric.c +++ b/sound/soc/fsl/pcm030-audio-fabric.c @@ -93,16 +93,21 @@ static int pcm030_fabric_probe(struct platform_device *op) dev_err(&op->dev, "platform_device_alloc() failed\n"); ret = platform_device_add(pdata->codec_device); - if (ret) + if (ret) { dev_err(&op->dev, "platform_device_add() failed: %d\n", ret); + platform_device_put(pdata->codec_device); + } ret = snd_soc_register_card(card); - if (ret) + if (ret) { dev_err(&op->dev, "snd_soc_register_card() failed: %d\n", ret); + platform_device_del(pdata->codec_device); + platform_device_put(pdata->codec_device); + } platform_set_drvdata(op, pdata); - return ret; + } static int pcm030_fabric_remove(struct platform_device *op) diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c index a89d1cfdda327e3c5d115625008a488e0f1b26ff..78419e18717d65d7bfb56033be72eb2c1629691b 100644 --- a/sound/soc/generic/simple-card.c +++ b/sound/soc/generic/simple-card.c @@ -28,6 +28,30 @@ static const struct snd_soc_ops simple_ops = { .hw_params = asoc_simple_hw_params, }; +static int asoc_simple_parse_platform(struct device_node *node, + struct snd_soc_dai_link_component *dlc) +{ + struct of_phandle_args args; + int ret; + + if (!node) + return 0; + + /* + * Get node via "sound-dai = <&phandle port>" + * it will be used as xxx_of_node on soc_bind_dai_link() + */ + ret = of_parse_phandle_with_args(node, DAI, CELL, 0, &args); + if (ret) + return ret; + + /* dai_name is not required and may not exist for plat component */ + + dlc->of_node = args.np; + + return 0; +} + static int asoc_simple_parse_dai(struct device_node *node, struct snd_soc_dai_link_component *dlc, int *is_single_link) @@ -289,7 +313,7 @@ static int simple_dai_link_of(struct asoc_simple_priv *priv, if (ret < 0) goto dai_link_of_err; - ret = asoc_simple_parse_dai(plat, platforms, NULL); + ret = asoc_simple_parse_platform(plat, platforms); if (ret < 0) goto dai_link_of_err; diff --git a/sound/soc/mediatek/Kconfig b/sound/soc/mediatek/Kconfig index 9306b7ca2644248a547fec5d31e593de8384a375..0d154350f180e605468ba115a83fb3aca8d151b9 100644 --- a/sound/soc/mediatek/Kconfig +++ b/sound/soc/mediatek/Kconfig @@ -216,7 +216,7 @@ config SND_SOC_MT8195_MT6359_RT1019_RT5682 config SND_SOC_MT8195_MT6359_RT1011_RT5682 tristate "ASoC Audio driver for MT8195 with MT6359 RT1011 RT5682 codec" - depends on I2C + depends on I2C && GPIOLIB depends on SND_SOC_MT8195 && MTK_PMIC_WRAP select SND_SOC_MT6359 select SND_SOC_RT1011 diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c index eb1c3aec479ba4ddd925e22955ace2176e2dfec9..19c4a90ec1ea96e793f00016c0dc03040428fcfe 100644 --- a/sound/soc/qcom/qdsp6/q6apm-dai.c +++ b/sound/soc/qcom/qdsp6/q6apm-dai.c @@ -308,8 +308,11 @@ static int q6apm_dai_close(struct snd_soc_component *component, struct snd_pcm_runtime *runtime = substream->runtime; struct q6apm_dai_rtd *prtd = runtime->private_data; - q6apm_graph_stop(prtd->graph); - q6apm_unmap_memory_regions(prtd->graph, substream->stream); + if (prtd->state) { /* only stop graph that is started */ + q6apm_graph_stop(prtd->graph); + q6apm_unmap_memory_regions(prtd->graph, substream->stream); + } + q6apm_graph_close(prtd->graph); prtd->graph = NULL; kfree(prtd); diff --git a/sound/soc/soc-acpi.c b/sound/soc/soc-acpi.c index cbd7ea48837b246ddcface067b0eff746fec59da..142476f1396ff42646728906423a429f38439160 100644 --- a/sound/soc/soc-acpi.c +++ b/sound/soc/soc-acpi.c @@ -55,16 +55,13 @@ EXPORT_SYMBOL_GPL(snd_soc_acpi_find_machine); static acpi_status snd_soc_acpi_find_package(acpi_handle handle, u32 level, void *context, void **ret) { - struct acpi_device *adev; + struct acpi_device *adev = acpi_fetch_acpi_dev(handle); acpi_status status; struct snd_soc_acpi_package_context *pkg_ctx = context; pkg_ctx->data_valid = false; - if (acpi_bus_get_device(handle, &adev)) - return AE_OK; - - if (adev->status.present && adev->status.functional) { + if (adev && adev->status.present && adev->status.functional) { struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; union acpi_object *myobj = NULL; diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 08eaa9ddf191e7adee120c3e7e46fe055e16ba11..9833611b83d1fc638f861a239be7b8ce38fb1915 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -316,13 +316,27 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, if (sign_bit) mask = BIT(sign_bit + 1) - 1; - val = ((ucontrol->value.integer.value[0] + min) & mask); + if (ucontrol->value.integer.value[0] < 0) + return -EINVAL; + val = ucontrol->value.integer.value[0]; + if (mc->platform_max && val > mc->platform_max) + return -EINVAL; + if (val > max - min) + return -EINVAL; + val = (val + min) & mask; if (invert) val = max - val; val_mask = mask << shift; val = val << shift; if (snd_soc_volsw_is_stereo(mc)) { - val2 = ((ucontrol->value.integer.value[1] + min) & mask); + if (ucontrol->value.integer.value[1] < 0) + return -EINVAL; + val2 = ucontrol->value.integer.value[1]; + if (mc->platform_max && val2 > mc->platform_max) + return -EINVAL; + if (val2 > max - min) + return -EINVAL; + val2 = (val2 + min) & mask; if (invert) val2 = max - val2; if (reg == reg2) { @@ -409,8 +423,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, int err = 0; unsigned int val, val_mask; + if (ucontrol->value.integer.value[0] < 0) + return -EINVAL; + val = ucontrol->value.integer.value[0]; + if (mc->platform_max && val > mc->platform_max) + return -EINVAL; + if (val > max - min) + return -EINVAL; val_mask = mask << shift; - val = (ucontrol->value.integer.value[0] + min) & mask; + val = (val + min) & mask; val = val << shift; err = snd_soc_component_update_bits(component, reg, val_mask, val); @@ -858,6 +879,8 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, long val = ucontrol->value.integer.value[0]; unsigned int i; + if (val < mc->min || val > mc->max) + return -EINVAL; if (invert) val = max - val; val &= mask; diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index 7abfc48b26ca51997b8cffeb567392f8729f0701..9a954680d492890133dfcb3ecb3e4fe4433262a3 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -46,8 +46,8 @@ static inline void snd_soc_dpcm_stream_lock_irq(struct snd_soc_pcm_runtime *rtd, snd_pcm_stream_lock_irq(snd_soc_dpcm_get_substream(rtd, stream)); } -#define snd_soc_dpcm_stream_lock_irqsave(rtd, stream, flags) \ - snd_pcm_stream_lock_irqsave(snd_soc_dpcm_get_substream(rtd, stream), flags) +#define snd_soc_dpcm_stream_lock_irqsave_nested(rtd, stream, flags) \ + snd_pcm_stream_lock_irqsave_nested(snd_soc_dpcm_get_substream(rtd, stream), flags) static inline void snd_soc_dpcm_stream_unlock_irq(struct snd_soc_pcm_runtime *rtd, int stream) @@ -1268,6 +1268,7 @@ static void dpcm_be_reparent(struct snd_soc_pcm_runtime *fe, void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) { struct snd_soc_dpcm *dpcm, *d; + LIST_HEAD(deleted_dpcms); snd_soc_dpcm_mutex_assert_held(fe); @@ -1287,13 +1288,18 @@ void dpcm_be_disconnect(struct snd_soc_pcm_runtime *fe, int stream) /* BEs still alive need new FE */ dpcm_be_reparent(fe, dpcm->be, stream); - dpcm_remove_debugfs_state(dpcm); - list_del(&dpcm->list_be); + list_move(&dpcm->list_fe, &deleted_dpcms); + } + snd_soc_dpcm_stream_unlock_irq(fe, stream); + + while (!list_empty(&deleted_dpcms)) { + dpcm = list_first_entry(&deleted_dpcms, struct snd_soc_dpcm, + list_fe); list_del(&dpcm->list_fe); + dpcm_remove_debugfs_state(dpcm); kfree(dpcm); } - snd_soc_dpcm_stream_unlock_irq(fe, stream); } /* get BE for DAI widget and stream */ @@ -2094,7 +2100,7 @@ int dpcm_be_dai_trigger(struct snd_soc_pcm_runtime *fe, int stream, be = dpcm->be; be_substream = snd_soc_dpcm_get_substream(be, stream); - snd_soc_dpcm_stream_lock_irqsave(be, stream, flags); + snd_soc_dpcm_stream_lock_irqsave_nested(be, stream, flags); /* is this op for this BE ? */ if (!snd_soc_dpcm_be_can_update(fe, be, stream)) diff --git a/sound/soc/xilinx/xlnx_formatter_pcm.c b/sound/soc/xilinx/xlnx_formatter_pcm.c index 91afea9d5de6787906ac5e817ba3406488fa3f58..ce19a6058b27964dfcd45336770b2e7cdd417cec 100644 --- a/sound/soc/xilinx/xlnx_formatter_pcm.c +++ b/sound/soc/xilinx/xlnx_formatter_pcm.c @@ -37,6 +37,7 @@ #define XLNX_AUD_XFER_COUNT 0x28 #define XLNX_AUD_CH_STS_START 0x2C #define XLNX_BYTES_PER_CH 0x44 +#define XLNX_AUD_ALIGN_BYTES 64 #define AUD_STS_IOC_IRQ_MASK BIT(31) #define AUD_STS_CH_STS_MASK BIT(29) @@ -368,12 +369,32 @@ static int xlnx_formatter_pcm_open(struct snd_soc_component *component, snd_soc_set_runtime_hwparams(substream, &xlnx_pcm_hardware); runtime->private_data = stream_data; - /* Resize the period size divisible by 64 */ + /* Resize the period bytes as divisible by 64 */ err = snd_pcm_hw_constraint_step(runtime, 0, - SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 64); + SNDRV_PCM_HW_PARAM_PERIOD_BYTES, + XLNX_AUD_ALIGN_BYTES); if (err) { dev_err(component->dev, - "unable to set constraint on period bytes\n"); + "Unable to set constraint on period bytes\n"); + return err; + } + + /* Resize the buffer bytes as divisible by 64 */ + err = snd_pcm_hw_constraint_step(runtime, 0, + SNDRV_PCM_HW_PARAM_BUFFER_BYTES, + XLNX_AUD_ALIGN_BYTES); + if (err) { + dev_err(component->dev, + "Unable to set constraint on buffer bytes\n"); + return err; + } + + /* Set periods as integer multiple */ + err = snd_pcm_hw_constraint_integer(runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + if (err < 0) { + dev_err(component->dev, + "Unable to set constraint on periods to be integer\n"); return err; } diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index e8f3f8d622ec50d647651c56c6872f6337fd7f3d..630766ba259fddd580a51534608b7f689d0bafc8 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1527,6 +1527,10 @@ error: usb_audio_err(chip, "cannot get connectors status: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n", UAC_GET_CUR, validx, idx, cval->val_type); + + if (val) + *val = 0; + return filter_error(cval, ret); } diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index b1522e43173e1b947011a70691530aa8a00a1d82..0ea39565e62329c98bf283bcff25826c0fd0a11b 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -84,7 +84,7 @@ * combination. */ { - USB_DEVICE(0x041e, 0x4095), + USB_AUDIO_DEVICE(0x041e, 0x4095), .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { .ifnum = QUIRK_ANY_INTERFACE, .type = QUIRK_COMPOSITE, diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 18de5f76f19856b11be223e72bce1142d68e1e28..6db4e2932b3d865785f179f5486106803b13629e 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -299,7 +299,9 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ #define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 2da3316bb55959dad6b400e70b06628c4de0201b..bf6e96011dfed4e2e8582e511c08b5b6cbd723b6 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -452,6 +452,9 @@ struct kvm_sync_regs { #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 +/* attributes for system fd (group 0) */ +#define KVM_X86_XCOMP_GUEST_SUPP 0 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; diff --git a/tools/arch/x86/include/uapi/asm/prctl.h b/tools/arch/x86/include/uapi/asm/prctl.h index 754a078568171451b1499147d3050500e0c8377e..500b96e71f1868dd097a2a796c8da89b5f85bed3 100644 --- a/tools/arch/x86/include/uapi/asm/prctl.h +++ b/tools/arch/x86/include/uapi/asm/prctl.h @@ -2,20 +2,22 @@ #ifndef _ASM_X86_PRCTL_H #define _ASM_X86_PRCTL_H -#define ARCH_SET_GS 0x1001 -#define ARCH_SET_FS 0x1002 -#define ARCH_GET_FS 0x1003 -#define ARCH_GET_GS 0x1004 +#define ARCH_SET_GS 0x1001 +#define ARCH_SET_FS 0x1002 +#define ARCH_GET_FS 0x1003 +#define ARCH_GET_GS 0x1004 -#define ARCH_GET_CPUID 0x1011 -#define ARCH_SET_CPUID 0x1012 +#define ARCH_GET_CPUID 0x1011 +#define ARCH_SET_CPUID 0x1012 -#define ARCH_GET_XCOMP_SUPP 0x1021 -#define ARCH_GET_XCOMP_PERM 0x1022 -#define ARCH_REQ_XCOMP_PERM 0x1023 +#define ARCH_GET_XCOMP_SUPP 0x1021 +#define ARCH_GET_XCOMP_PERM 0x1022 +#define ARCH_REQ_XCOMP_PERM 0x1023 +#define ARCH_GET_XCOMP_GUEST_PERM 0x1024 +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 -#define ARCH_MAP_VDSO_X32 0x2001 -#define ARCH_MAP_VDSO_32 0x2002 -#define ARCH_MAP_VDSO_64 0x2003 +#define ARCH_MAP_VDSO_X32 0x2001 +#define ARCH_MAP_VDSO_32 0x2002 +#define ARCH_MAP_VDSO_64 0x2003 #endif /* _ASM_X86_PRCTL_H */ diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index 9ddeca947635427323b5480f67baeb39c862eaef..320a88ac28c996402fc5c609ec7ac1c2b2ca2314 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -9,7 +9,11 @@ ifeq ($(V),1) msg = else Q = @ - msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; + ifeq ($(silent),1) + msg = + else + msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; + endif MAKEFLAGS=--no-print-directory endif diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index ab9353f2fd46ab1c127347b2e585e69aa97ea92e..9a5c1f008fe60302d36df1fcb259b14767011b57 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -68,7 +68,7 @@ int handle__sched_switch(u64 *ctx) */ struct task_struct *prev = (struct task_struct *)ctx[1]; struct task_struct *next = (struct task_struct *)ctx[2]; - struct event event = {}; + struct runq_event event = {}; u64 *tsp, delta_us; long state; u32 pid; diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c index 2414cc764461c8d99b5eaf6a82eb266aca307903..d78f4148597f0f5cc77ae41fe4d73ba691ec34ec 100644 --- a/tools/bpf/runqslower/runqslower.c +++ b/tools/bpf/runqslower/runqslower.c @@ -100,7 +100,7 @@ static int bump_memlock_rlimit(void) void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) { - const struct event *e = data; + const struct runq_event *e = data; struct tm *tm; char ts[32]; time_t t; diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h index 9db225425e5ff900d23dda240a5802d775ffa83a..4f70f07200c23d40637128f82f9efed838ec67f3 100644 --- a/tools/bpf/runqslower/runqslower.h +++ b/tools/bpf/runqslower/runqslower.h @@ -4,7 +4,7 @@ #define TASK_COMM_LEN 16 -struct event { +struct runq_event { char task[TASK_COMM_LEN]; __u64 delta_us; pid_t pid; diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h index 5d2ab38965ccae1f4b91dea6e7b99d103be26d89..9ab313e93555addd2ab7a57702c4eb4fa5e31325 100644 --- a/tools/include/asm-generic/bitops.h +++ b/tools/include/asm-generic/bitops.h @@ -18,7 +18,6 @@ #include #include #include -#include #ifndef _TOOLS_LINUX_BITOPS_H_ #error only can be included directly diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h deleted file mode 100644 index 6481fd11012a9e471ae305d1627a72f7ffea8414..0000000000000000000000000000000000000000 --- a/tools/include/asm-generic/bitops/find.h +++ /dev/null @@ -1,145 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ -#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ - -extern unsigned long _find_next_bit(const unsigned long *addr1, - const unsigned long *addr2, unsigned long nbits, - unsigned long start, unsigned long invert, unsigned long le); -extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); -extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); -extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); - -#ifndef find_next_bit -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - * - * Returns the bit number for the next set bit - * If no bits are set, returns @size. - */ -static inline -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val; - - if (unlikely(offset >= size)) - return size; - - val = *addr & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; - } - - return _find_next_bit(addr, NULL, size, offset, 0UL, 0); -} -#endif - -#ifndef find_next_and_bit -/** - * find_next_and_bit - find the next set bit in both memory regions - * @addr1: The first address to base the search on - * @addr2: The second address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - * - * Returns the bit number for the next set bit - * If no bits are set, returns @size. - */ -static inline -unsigned long find_next_and_bit(const unsigned long *addr1, - const unsigned long *addr2, unsigned long size, - unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val; - - if (unlikely(offset >= size)) - return size; - - val = *addr1 & *addr2 & GENMASK(size - 1, offset); - return val ? __ffs(val) : size; - } - - return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); -} -#endif - -#ifndef find_next_zero_bit -/** - * find_next_zero_bit - find the next cleared bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The bitmap size in bits - * - * Returns the bit number of the next zero bit - * If no bits are zero, returns @size. - */ -static inline -unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - if (small_const_nbits(size)) { - unsigned long val; - - if (unlikely(offset >= size)) - return size; - - val = *addr | ~GENMASK(size - 1, offset); - return val == ~0UL ? size : ffz(val); - } - - return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); -} -#endif - -#ifndef find_first_bit - -/** - * find_first_bit - find the first set bit in a memory region - * @addr: The address to start the search at - * @size: The maximum number of bits to search - * - * Returns the bit number of the first set bit. - * If no bits are set, returns @size. - */ -static inline -unsigned long find_first_bit(const unsigned long *addr, unsigned long size) -{ - if (small_const_nbits(size)) { - unsigned long val = *addr & GENMASK(size - 1, 0); - - return val ? __ffs(val) : size; - } - - return _find_first_bit(addr, size); -} - -#endif /* find_first_bit */ - -#ifndef find_first_zero_bit - -/** - * find_first_zero_bit - find the first cleared bit in a memory region - * @addr: The address to start the search at - * @size: The maximum number of bits to search - * - * Returns the bit number of the first cleared bit. - * If no bits are zero, returns @size. - */ -static inline -unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) -{ - if (small_const_nbits(size)) { - unsigned long val = *addr | ~GENMASK(size - 1, 0); - - return val == ~0UL ? size : ffz(val); - } - - return _find_first_zero_bit(addr, size); -} -#endif - -#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 95611df1d26e52835baa7bae372b34d4756d3866..ea97804d04d4ac9ef6818d35cea88b1404c75c5b 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -1,9 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PERF_BITOPS_H -#define _PERF_BITOPS_H +#ifndef _TOOLS_LINUX_BITMAP_H +#define _TOOLS_LINUX_BITMAP_H #include #include +#include #include #include @@ -181,4 +182,4 @@ static inline int bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } -#endif /* _PERF_BITOPS_H */ +#endif /* _TOOLS_LINUX_BITMAP_H */ diff --git a/include/asm-generic/bitops/find.h b/tools/include/linux/find.h similarity index 83% rename from include/asm-generic/bitops/find.h rename to tools/include/linux/find.h index 0d132ee2a291335516b20defa2e893c90a551a52..47e2bd6c51745d4942f4121dc58004f4d94c0aa5 100644 --- a/include/asm-generic/bitops/find.h +++ b/tools/include/linux/find.h @@ -1,11 +1,19 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_GENERIC_BITOPS_FIND_H_ -#define _ASM_GENERIC_BITOPS_FIND_H_ +#ifndef _TOOLS_LINUX_FIND_H_ +#define _TOOLS_LINUX_FIND_H_ + +#ifndef _TOOLS_LINUX_BITMAP_H +#error tools: only can be included directly +#endif + +#include extern unsigned long _find_next_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start, unsigned long invert, unsigned long le); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); @@ -95,8 +103,7 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, } #endif -#ifdef CONFIG_GENERIC_FIND_FIRST_BIT - +#ifndef find_first_bit /** * find_first_bit - find the first set bit in a memory region * @addr: The address to start the search at @@ -116,7 +123,34 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) return _find_first_bit(addr, size); } +#endif +#ifndef find_first_and_bit +/** + * find_first_and_bit - find the first set bit in both memory regions + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the next set bit + * If no bits are set, returns @size. + */ +static inline +unsigned long find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & *addr2 & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_and_bit(addr1, addr2, size); +} +#endif + +#ifndef find_first_zero_bit /** * find_first_zero_bit - find the first cleared bit in a memory region * @addr: The address to start the search at @@ -136,16 +170,7 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) return _find_first_zero_bit(addr, size); } -#else /* CONFIG_GENERIC_FIND_FIRST_BIT */ - -#ifndef find_first_bit -#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) #endif -#ifndef find_first_zero_bit -#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) -#endif - -#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */ #ifndef find_last_bit /** @@ -185,4 +210,5 @@ extern unsigned long find_next_clump8(unsigned long *clump, #define find_first_clump8(clump, bits, size) \ find_next_clump8((clump), (bits), (size), 0) -#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */ + +#endif /*__LINUX_FIND_H_ */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 4557a8b6086f4ffe71eb65bba74effa9045aa681..1c48b0ae3ba30229ff3104b5c8258e1a70373475 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) #define __NR_futex_waitv 449 __SYSCALL(__NR_futex_waitv, sys_futex_waitv) +#define __NR_set_mempolicy_home_node 450 +__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) + #undef __NR_syscalls -#define __NR_syscalls 450 +#define __NR_syscalls 451 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index f066637ee206406aa599a8e7ca56f3526fc61915..5191b57e156220bd7fc6d85940c301e16cc1645a 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1131,7 +1131,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 #define KVM_CAP_ARM_MTE 205 #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 -#define KVM_CAP_XSAVE2 207 +#define KVM_CAP_VM_GPA_BITS 207 +#define KVM_CAP_XSAVE2 208 +#define KVM_CAP_SYS_ATTRIBUTES 209 #ifdef KVM_CAP_IRQ_ROUTING @@ -1163,11 +1165,20 @@ struct kvm_irq_routing_hv_sint { __u32 sint; }; +struct kvm_irq_routing_xen_evtchn { + __u32 port; + __u32 vcpu; + __u32 priority; +}; + +#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1)) + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 +#define KVM_IRQ_ROUTING_XEN_EVTCHN 5 struct kvm_irq_routing_entry { __u32 gsi; @@ -1179,6 +1190,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; struct kvm_irq_routing_hv_sint hv_sint; + struct kvm_irq_routing_xen_evtchn xen_evtchn; __u32 pad[8]; } u; }; @@ -1209,6 +1221,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) +#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) struct kvm_xen_hvm_config { __u32 flags; @@ -1552,8 +1565,6 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_XSAVE */ #define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave) #define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave) -/* Available with KVM_CAP_XSAVE2 */ -#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) /* Available with KVM_CAP_XCRS */ #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) @@ -2034,4 +2045,7 @@ struct kvm_stats_desc { #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) +/* Available with KVM_CAP_XSAVE2 */ +#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + #endif /* __LINUX_KVM_H */ diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h deleted file mode 100644 index 45fcbf99d72ec49fdf5efd0030876819e6b3de27..0000000000000000000000000000000000000000 --- a/tools/include/uapi/linux/lirc.h +++ /dev/null @@ -1,229 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * lirc.h - linux infrared remote control header file - * last modified 2010/07/13 by Jarod Wilson - */ - -#ifndef _LINUX_LIRC_H -#define _LINUX_LIRC_H - -#include -#include - -#define PULSE_BIT 0x01000000 -#define PULSE_MASK 0x00FFFFFF - -#define LIRC_MODE2_SPACE 0x00000000 -#define LIRC_MODE2_PULSE 0x01000000 -#define LIRC_MODE2_FREQUENCY 0x02000000 -#define LIRC_MODE2_TIMEOUT 0x03000000 - -#define LIRC_VALUE_MASK 0x00FFFFFF -#define LIRC_MODE2_MASK 0xFF000000 - -#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE) -#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE) -#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY) -#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT) - -#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK) -#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK) - -#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE) -#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE) -#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY) -#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT) - -/* used heavily by lirc userspace */ -#define lirc_t int - -/*** lirc compatible hardware features ***/ - -#define LIRC_MODE2SEND(x) (x) -#define LIRC_SEND2MODE(x) (x) -#define LIRC_MODE2REC(x) ((x) << 16) -#define LIRC_REC2MODE(x) ((x) >> 16) - -#define LIRC_MODE_RAW 0x00000001 -#define LIRC_MODE_PULSE 0x00000002 -#define LIRC_MODE_MODE2 0x00000004 -#define LIRC_MODE_SCANCODE 0x00000008 -#define LIRC_MODE_LIRCCODE 0x00000010 - - -#define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW) -#define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE) -#define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2) -#define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE) - -#define LIRC_CAN_SEND_MASK 0x0000003f - -#define LIRC_CAN_SET_SEND_CARRIER 0x00000100 -#define LIRC_CAN_SET_SEND_DUTY_CYCLE 0x00000200 -#define LIRC_CAN_SET_TRANSMITTER_MASK 0x00000400 - -#define LIRC_CAN_REC_RAW LIRC_MODE2REC(LIRC_MODE_RAW) -#define LIRC_CAN_REC_PULSE LIRC_MODE2REC(LIRC_MODE_PULSE) -#define LIRC_CAN_REC_MODE2 LIRC_MODE2REC(LIRC_MODE_MODE2) -#define LIRC_CAN_REC_SCANCODE LIRC_MODE2REC(LIRC_MODE_SCANCODE) -#define LIRC_CAN_REC_LIRCCODE LIRC_MODE2REC(LIRC_MODE_LIRCCODE) - -#define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK) - -#define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16) -#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16) - -#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000 -#define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000 -#define LIRC_CAN_GET_REC_RESOLUTION 0x20000000 -#define LIRC_CAN_SET_REC_TIMEOUT 0x10000000 -#define LIRC_CAN_SET_REC_FILTER 0x08000000 - -#define LIRC_CAN_MEASURE_CARRIER 0x02000000 -#define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000 - -#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK) -#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK) - -#define LIRC_CAN_NOTIFY_DECODE 0x01000000 - -/*** IOCTL commands for lirc driver ***/ - -#define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32) - -#define LIRC_GET_SEND_MODE _IOR('i', 0x00000001, __u32) -#define LIRC_GET_REC_MODE _IOR('i', 0x00000002, __u32) -#define LIRC_GET_REC_RESOLUTION _IOR('i', 0x00000007, __u32) - -#define LIRC_GET_MIN_TIMEOUT _IOR('i', 0x00000008, __u32) -#define LIRC_GET_MAX_TIMEOUT _IOR('i', 0x00000009, __u32) - -/* code length in bits, currently only for LIRC_MODE_LIRCCODE */ -#define LIRC_GET_LENGTH _IOR('i', 0x0000000f, __u32) - -#define LIRC_SET_SEND_MODE _IOW('i', 0x00000011, __u32) -#define LIRC_SET_REC_MODE _IOW('i', 0x00000012, __u32) -/* Note: these can reset the according pulse_width */ -#define LIRC_SET_SEND_CARRIER _IOW('i', 0x00000013, __u32) -#define LIRC_SET_REC_CARRIER _IOW('i', 0x00000014, __u32) -#define LIRC_SET_SEND_DUTY_CYCLE _IOW('i', 0x00000015, __u32) -#define LIRC_SET_TRANSMITTER_MASK _IOW('i', 0x00000017, __u32) - -/* - * when a timeout != 0 is set the driver will send a - * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is - * never sent, timeout is disabled by default - */ -#define LIRC_SET_REC_TIMEOUT _IOW('i', 0x00000018, __u32) - -/* 1 enables, 0 disables timeout reports in MODE2 */ -#define LIRC_SET_REC_TIMEOUT_REPORTS _IOW('i', 0x00000019, __u32) - -/* - * if enabled from the next key press on the driver will send - * LIRC_MODE2_FREQUENCY packets - */ -#define LIRC_SET_MEASURE_CARRIER_MODE _IOW('i', 0x0000001d, __u32) - -/* - * to set a range use LIRC_SET_REC_CARRIER_RANGE with the - * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound - */ -#define LIRC_SET_REC_CARRIER_RANGE _IOW('i', 0x0000001f, __u32) - -#define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32) - -/* - * Return the recording timeout, which is either set by - * the ioctl LIRC_SET_REC_TIMEOUT or by the kernel after setting the protocols. - */ -#define LIRC_GET_REC_TIMEOUT _IOR('i', 0x00000024, __u32) - -/* - * struct lirc_scancode - decoded scancode with protocol for use with - * LIRC_MODE_SCANCODE - * - * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR - * was decoded. - * @flags: should be 0 for transmit. When receiving scancodes, - * LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set - * depending on the protocol - * @rc_proto: see enum rc_proto - * @keycode: the translated keycode. Set to 0 for transmit. - * @scancode: the scancode received or to be sent - */ -struct lirc_scancode { - __u64 timestamp; - __u16 flags; - __u16 rc_proto; - __u32 keycode; - __u64 scancode; -}; - -/* Set if the toggle bit of rc-5 or rc-6 is enabled */ -#define LIRC_SCANCODE_FLAG_TOGGLE 1 -/* Set if this is a nec or sanyo repeat */ -#define LIRC_SCANCODE_FLAG_REPEAT 2 - -/** - * enum rc_proto - the Remote Controller protocol - * - * @RC_PROTO_UNKNOWN: Protocol not known - * @RC_PROTO_OTHER: Protocol known but proprietary - * @RC_PROTO_RC5: Philips RC5 protocol - * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol - * @RC_PROTO_RC5_SZ: StreamZap variant of RC5 - * @RC_PROTO_JVC: JVC protocol - * @RC_PROTO_SONY12: Sony 12 bit protocol - * @RC_PROTO_SONY15: Sony 15 bit protocol - * @RC_PROTO_SONY20: Sony 20 bit protocol - * @RC_PROTO_NEC: NEC protocol - * @RC_PROTO_NECX: Extended NEC protocol - * @RC_PROTO_NEC32: NEC 32 bit protocol - * @RC_PROTO_SANYO: Sanyo protocol - * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard - * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse - * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol - * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol - * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol - * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol - * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol - * @RC_PROTO_SHARP: Sharp protocol - * @RC_PROTO_XMP: XMP protocol - * @RC_PROTO_CEC: CEC protocol - * @RC_PROTO_IMON: iMon Pad protocol - * @RC_PROTO_RCMM12: RC-MM protocol 12 bits - * @RC_PROTO_RCMM24: RC-MM protocol 24 bits - * @RC_PROTO_RCMM32: RC-MM protocol 32 bits - */ -enum rc_proto { - RC_PROTO_UNKNOWN = 0, - RC_PROTO_OTHER = 1, - RC_PROTO_RC5 = 2, - RC_PROTO_RC5X_20 = 3, - RC_PROTO_RC5_SZ = 4, - RC_PROTO_JVC = 5, - RC_PROTO_SONY12 = 6, - RC_PROTO_SONY15 = 7, - RC_PROTO_SONY20 = 8, - RC_PROTO_NEC = 9, - RC_PROTO_NECX = 10, - RC_PROTO_NEC32 = 11, - RC_PROTO_SANYO = 12, - RC_PROTO_MCIR2_KBD = 13, - RC_PROTO_MCIR2_MSE = 14, - RC_PROTO_RC6_0 = 15, - RC_PROTO_RC6_6A_20 = 16, - RC_PROTO_RC6_6A_24 = 17, - RC_PROTO_RC6_6A_32 = 18, - RC_PROTO_RC6_MCE = 19, - RC_PROTO_SHARP = 20, - RC_PROTO_XMP = 21, - RC_PROTO_CEC = 22, - RC_PROTO_IMON = 23, - RC_PROTO_RCMM12 = 24, - RC_PROTO_RCMM24 = 25, - RC_PROTO_RCMM32 = 26, -}; - -#endif diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 4cd39aaccbe7b90491137ec2d9381b8b042ba5f4..1b65042ab1db8df48b3daa7940756e1d0e132abe 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1332,9 +1332,9 @@ union perf_mem_data_src { /* hop level */ #define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ +#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ +#define PERF_MEM_HOPS_3 0x04 /* remote board */ /* 5-7 available */ #define PERF_MEM_HOPS_SHIFT 43 diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index bb73e9a0b24fc142764f2376eb57d8cabfb84e1a..e998764f0262522903f04101f945a73a2f724a36 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -272,4 +272,7 @@ struct prctl_mm_map { # define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1 # define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2 +#define PR_SET_VMA 0x53564d41 +# define PR_SET_VMA_ANON_NAME 0 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index 5fbb79e30819af917e5e9f5dc39994522b3ac46e..2d3e5df39a59eeec5edaed0754bf2dfe798febfe 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -56,8 +56,10 @@ * * ****************************************************************************/ +#define AES_IEC958_STATUS_SIZE 24 + struct snd_aes_iec958 { - unsigned char status[24]; /* AES/IEC958 channel status bits */ + unsigned char status[AES_IEC958_STATUS_SIZE]; /* AES/IEC958 channel status bits */ unsigned char subcode[147]; /* AES/IEC958 subcode bits */ unsigned char pad; /* nothing */ unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */ @@ -202,6 +204,11 @@ typedef int __bitwise snd_pcm_format_t; #define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */ #define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */ +/* + * For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the + * available bit count in most significant bit. It's for the case of so-called 'left-justified' or + * `right-padding` sample which has less width than 32 bit. + */ #define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10) #define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11) #define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12) @@ -300,7 +307,7 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ #define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ - +#define SNDRV_PCM_INFO_NO_REWINDS 0x20000000 /* hardware can only support monotonic changes of appl_ptr */ #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index 109aa7ffcf9731cf7e241e1f6fa2b673ae9418c4..ba4b8d94e004823219594a09347be0d416823db0 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -96,6 +96,26 @@ unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) } #endif +#ifndef find_first_and_bit +/* + * Find the first set bit in two memory regions. + */ +unsigned long _find_first_and_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + unsigned long idx, val; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + val = addr1[idx] & addr2[idx]; + if (val) + return min(idx * BITS_PER_LONG + __ffs(val), size); + } + + return size; +} +#endif + #ifndef find_first_zero_bit /* * Find the first cleared bit in a memory region. diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 7ea86a44eae5a9518808d607444129b7389ae869..210ea7c06ce85074853a6951ab2c53ff8d56c005 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -141,7 +141,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, } if (evsel->fd == NULL && - perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0) + perf_evsel__alloc_fd(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0) return -ENOMEM; perf_cpu_map__for_each_cpu(cpu, idx, cpus) { @@ -384,7 +384,7 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter) { int err = 0, i; - for (i = 0; i < evsel->cpus->nr && !err; i++) + for (i = 0; i < perf_cpu_map__nr(evsel->cpus) && !err; i++) err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_SET_FILTER, (void *)filter, i); diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index f7ee07cb581885d123a864a6df9b61536d056f2d..0d1634cedf44e145562315db077868f533823fd3 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "internal.h" void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev, @@ -294,6 +295,103 @@ static u64 read_timestamp(void) return low | ((u64)high) << 32; } +#elif defined(__aarch64__) +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +static u64 read_pmccntr(void) +{ + return read_sysreg(pmccntr_el0); +} + +#define PMEVCNTR_READ(idx) \ + static u64 read_pmevcntr_##idx(void) { \ + return read_sysreg(pmevcntr##idx##_el0); \ + } + +PMEVCNTR_READ(0); +PMEVCNTR_READ(1); +PMEVCNTR_READ(2); +PMEVCNTR_READ(3); +PMEVCNTR_READ(4); +PMEVCNTR_READ(5); +PMEVCNTR_READ(6); +PMEVCNTR_READ(7); +PMEVCNTR_READ(8); +PMEVCNTR_READ(9); +PMEVCNTR_READ(10); +PMEVCNTR_READ(11); +PMEVCNTR_READ(12); +PMEVCNTR_READ(13); +PMEVCNTR_READ(14); +PMEVCNTR_READ(15); +PMEVCNTR_READ(16); +PMEVCNTR_READ(17); +PMEVCNTR_READ(18); +PMEVCNTR_READ(19); +PMEVCNTR_READ(20); +PMEVCNTR_READ(21); +PMEVCNTR_READ(22); +PMEVCNTR_READ(23); +PMEVCNTR_READ(24); +PMEVCNTR_READ(25); +PMEVCNTR_READ(26); +PMEVCNTR_READ(27); +PMEVCNTR_READ(28); +PMEVCNTR_READ(29); +PMEVCNTR_READ(30); + +/* + * Read a value direct from PMEVCNTR + */ +static u64 read_perf_counter(unsigned int counter) +{ + static u64 (* const read_f[])(void) = { + read_pmevcntr_0, + read_pmevcntr_1, + read_pmevcntr_2, + read_pmevcntr_3, + read_pmevcntr_4, + read_pmevcntr_5, + read_pmevcntr_6, + read_pmevcntr_7, + read_pmevcntr_8, + read_pmevcntr_9, + read_pmevcntr_10, + read_pmevcntr_11, + read_pmevcntr_13, + read_pmevcntr_12, + read_pmevcntr_14, + read_pmevcntr_15, + read_pmevcntr_16, + read_pmevcntr_17, + read_pmevcntr_18, + read_pmevcntr_19, + read_pmevcntr_20, + read_pmevcntr_21, + read_pmevcntr_22, + read_pmevcntr_23, + read_pmevcntr_24, + read_pmevcntr_25, + read_pmevcntr_26, + read_pmevcntr_27, + read_pmevcntr_28, + read_pmevcntr_29, + read_pmevcntr_30, + read_pmccntr + }; + + if (counter < ARRAY_SIZE(read_f)) + return (read_f[counter])(); + + return 0; +} + +static u64 read_timestamp(void) { return read_sysreg(cntvct_el0); } + #else static u64 read_perf_counter(unsigned int counter __maybe_unused) { return 0; } static u64 read_timestamp(void) { return 0; } diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 33ae9334861a242ab59a68979d32ca3b7ea8aa68..89be89afb24d93d0440a6a8688fa68992673578f 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -130,6 +130,9 @@ static int test_stat_user_read(int event) struct perf_event_attr attr = { .type = PERF_TYPE_HARDWARE, .config = event, +#ifdef __aarch64__ + .config1 = 0x2, /* Request user access */ +#endif }; int err, i; @@ -150,7 +153,7 @@ static int test_stat_user_read(int event) pc = perf_evsel__mmap_base(evsel, 0, 0); __T("failed to get mmapped address", pc); -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) __T("userspace counter access not supported", pc->cap_user_rdpmc); __T("userspace counter access not enabled", pc->index); __T("userspace counter width not set", pc->pmc_width >= 32); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c2d2ab9a28616ea65274dc68d9cedb42c65ab2df..7c33ec67c4a9534497f407d43c83998a98a7eb84 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2854,7 +2854,7 @@ static inline bool func_uaccess_safe(struct symbol *func) static inline const char *call_dest_name(struct instruction *insn) { - static char pvname[16]; + static char pvname[19]; struct reloc *rel; int idx; diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index e2c481fcede6bd11a5bbd4b43c02b780be0de10b..3f1886ad9d8060b48522e5af59aeac2b61fbfd34 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -364,3 +364,4 @@ # 447 reserved for memfd_secret 448 n64 process_mrelease sys_process_mrelease 449 n64 futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 15109af9d0754d5fcb6e455532944ef1a0a14e17..2600b4237292c78c44363159a02eaca9a2dd88c8 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -529,3 +529,4 @@ # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv +450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index ed9c5c2eafad700ce45ad0178837ed3d1c9204d1..799147658dee20dde3993818f977579429898e4d 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -452,3 +452,4 @@ # 447 reserved for memfd_secret 448 common process_mrelease sys_process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index fe8f8dd157b4d49c7daf6abc5441e1039603f126..c84d12608cd2de9e45f6cc166169cf70ecbf7518 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -371,6 +371,7 @@ 447 common memfd_secret sys_memfd_secret 448 common process_mrelease sys_process_mrelease 449 common futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 1a17ec83d3c46963c03b4abe62c81561f357bf13..740ae764537e8c886ce91897056bde7d2602bada 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -333,7 +333,7 @@ int bench_epoll_ctl(int argc, const char **argv) /* default to the number of CPUs */ if (!nthreads) - nthreads = cpu->nr; + nthreads = perf_cpu_map__nr(cpu); worker = calloc(nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 0d1dd887919732906b0bd070d6de6a8023dae77f..37de970c97437ccb983c2154a250d3891c867546 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -452,7 +452,7 @@ int bench_epoll_wait(int argc, const char **argv) /* default to the number of CPUs and leave one for the writer pthread */ if (!nthreads) - nthreads = cpu->nr - 1; + nthreads = perf_cpu_map__nr(cpu) - 1; worker = calloc(nthreads, sizeof(*worker)); if (!worker) { diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c index 482738e9bdadfb25718db999f838c47fd28717ba..de56601f69ee6c32bb2542dfda9d626080c97ee2 100644 --- a/tools/perf/bench/evlist-open-close.c +++ b/tools/perf/bench/evlist-open-close.c @@ -71,7 +71,7 @@ static int evlist__count_evsel_fds(struct evlist *evlist) int cnt = 0; evlist__for_each_entry(evlist, evsel) - cnt += evsel->core.threads->nr * evsel->core.cpus->nr; + cnt += evsel->core.threads->nr * perf_cpu_map__nr(evsel->core.cpus); return cnt; } @@ -151,7 +151,7 @@ static int bench_evlist_open_close__run(char *evstr) init_stats(&time_stats); - printf(" Number of cpus:\t%d\n", evlist->core.cpus->nr); + printf(" Number of cpus:\t%d\n", perf_cpu_map__nr(evlist->core.cpus)); printf(" Number of threads:\t%d\n", evlist->core.threads->nr); printf(" Number of events:\t%d (%d fds)\n", evlist->core.nr_entries, evlist__count_evsel_fds(evlist)); diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 9627b6ab8670449668601cf5988a3c0edc1cf9a0..dbcecec4eedacec29b9f2c88654c7a0d8395fd51 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -150,7 +150,7 @@ int bench_futex_hash(int argc, const char **argv) } if (!params.nthreads) /* default to the number of CPUs */ - params.nthreads = cpu->nr; + params.nthreads = perf_cpu_map__nr(cpu); worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index a512a320df74b3093e139d5cf9dd3de9b9424994..6fc9a3d55c1f768da88b33f2a88e23f4bfab84d9 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -173,7 +173,7 @@ int bench_futex_lock_pi(int argc, const char **argv) } if (!params.nthreads) - params.nthreads = cpu->nr; + params.nthreads = perf_cpu_map__nr(cpu); worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index aca47ce8b1e7ebdaf1008c8e0d015caa9f1030d0..2f59d5d1c50968cf906de919e303be1c472aa370 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -175,7 +175,7 @@ int bench_futex_requeue(int argc, const char **argv) } if (!params.nthreads) - params.nthreads = cpu->nr; + params.nthreads = perf_cpu_map__nr(cpu); worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 888ee60379458dcb10c06a8ca19abab704e6b908..861deb934745d279238b2a91c646bbb315274be8 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -252,7 +252,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) err(EXIT_FAILURE, "calloc"); if (!params.nthreads) - params.nthreads = cpu->nr; + params.nthreads = perf_cpu_map__nr(cpu); /* some sanity checks */ if (params.nwakes > params.nthreads || diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index aa82db51c0abb06d8f10c0da9580aa19d642f447..cfda48bef1d72d954a81a71b44bb18a4c9f51efc 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -151,7 +151,7 @@ int bench_futex_wake(int argc, const char **argv) } if (!params.nthreads) - params.nthreads = cpu->nr; + params.nthreads = perf_cpu_map__nr(cpu); worker = calloc(params.nthreads, sizeof(*worker)); if (!worker) diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 71452599f87d8d0b0efab1231affa074e23a32ca..a8785dec5ca6dc6e8e05bfeb6b2267fb1f105ea6 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -281,7 +281,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap) int ret; int last_cpu; - last_cpu = perf_cpu_map__cpu(cpumap, cpumap->nr - 1).cpu; + last_cpu = perf_cpu_map__cpu(cpumap, perf_cpu_map__nr(cpumap) - 1).cpu; mask_size = last_cpu / 4 + 2; /* one more byte for EOS */ mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */ @@ -1115,6 +1115,7 @@ enum perf_ftrace_subcommand { int cmd_ftrace(int argc, const char **argv) { int ret; + int (*cmd_func)(struct perf_ftrace *) = NULL; struct perf_ftrace ftrace = { .tracer = DEFAULT_TRACER, .target = { .uid = UINT_MAX, }, @@ -1221,6 +1222,28 @@ int cmd_ftrace(int argc, const char **argv) goto out_delete_filters; } + switch (subcmd) { + case PERF_FTRACE_TRACE: + if (!argc && target__none(&ftrace.target)) + ftrace.target.system_wide = true; + cmd_func = __cmd_ftrace; + break; + case PERF_FTRACE_LATENCY: + if (list_empty(&ftrace.filters)) { + pr_err("Should provide a function to measure\n"); + parse_options_usage(ftrace_usage, options, "T", 1); + ret = -EINVAL; + goto out_delete_filters; + } + cmd_func = __cmd_latency; + break; + case PERF_FTRACE_NONE: + default: + pr_err("Invalid subcommand\n"); + ret = -EINVAL; + goto out_delete_filters; + } + ret = target__validate(&ftrace.target); if (ret) { char errbuf[512]; @@ -1248,27 +1271,7 @@ int cmd_ftrace(int argc, const char **argv) goto out_delete_evlist; } - switch (subcmd) { - case PERF_FTRACE_TRACE: - if (!argc && target__none(&ftrace.target)) - ftrace.target.system_wide = true; - ret = __cmd_ftrace(&ftrace); - break; - case PERF_FTRACE_LATENCY: - if (list_empty(&ftrace.filters)) { - pr_err("Should provide a function to measure\n"); - parse_options_usage(ftrace_usage, options, "T", 1); - ret = -EINVAL; - goto out_delete_evlist; - } - ret = __cmd_latency(&ftrace); - break; - case PERF_FTRACE_NONE: - default: - pr_err("Invalid subcommand\n"); - ret = -EINVAL; - break; - } + ret = cmd_func(&ftrace); out_delete_evlist: evlist__delete(ftrace.evlist); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 409b721666cba393adfd398969783d51f834f2f4..fbf43a454cbad2ccd37ab3cc4f1718de6d40c05d 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -535,12 +535,9 @@ static int perf_event__repipe_exit(struct perf_tool *tool, static int perf_event__repipe_tracing_data(struct perf_session *session, union perf_event *event) { - int err; - perf_event__repipe_synth(session->tool, event); - err = perf_event__process_tracing_data(session, event); - return err; + return perf_event__process_tracing_data(session, event); } static int dso__read_build_id(struct dso *dso) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ecd4f99a6c14f4be059a842d8c5088bef797394a..abae8184e171ed44d36eb198e61fbe8378ecfe92 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -515,7 +515,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(PHYS_ADDR) && - evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR)) + evsel__do_check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR, allow_user_set)) return -EINVAL; if (PRINT_FIELD(DATA_PAGE_SIZE) && diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 973ade18b72a947900405d676c92b11135946525..3f98689dd68782ac10d4a11d3dc157caed8bc355 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -230,11 +230,12 @@ static bool cpus_map_matched(struct evsel *a, struct evsel *b) if (!a->core.cpus || !b->core.cpus) return false; - if (a->core.cpus->nr != b->core.cpus->nr) + if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus)) return false; - for (int i = 0; i < a->core.cpus->nr; i++) { - if (a->core.cpus->map[i].cpu != b->core.cpus->map[i].cpu) + for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) { + if (perf_cpu_map__cpu(a->core.cpus, i).cpu != + perf_cpu_map__cpu(b->core.cpus, i).cpu) return false; } @@ -788,7 +789,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) const bool forks = (argc > 0); bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; struct evlist_cpu_iterator evlist_cpu_itr; - struct affinity affinity; + struct affinity saved_affinity, *affinity = NULL; int err; bool second_pass = false; @@ -803,8 +804,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) if (group) evlist__set_leader(evsel_list); - if (affinity__setup(&affinity) < 0) - return -1; + if (!cpu_map__is_dummy(evsel_list->core.cpus)) { + if (affinity__setup(&saved_affinity) < 0) + return -1; + affinity = &saved_affinity; + } evlist__for_each_entry(evsel_list, counter) { if (bpf_counter__load(counter, &target)) @@ -813,7 +817,7 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) all_counters_use_bpf = false; } - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) { + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { counter = evlist_cpu_itr.evsel; /* @@ -869,7 +873,7 @@ try_again: */ /* First close errored or weak retry */ - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) { + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { counter = evlist_cpu_itr.evsel; if (!counter->reset_group && !counter->errored) @@ -878,7 +882,7 @@ try_again: perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx); } /* Now reopen weak */ - evlist__for_each_cpu(evlist_cpu_itr, evsel_list, &affinity) { + evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) { counter = evlist_cpu_itr.evsel; if (!counter->reset_group && !counter->errored) @@ -904,7 +908,7 @@ try_again_reset: counter->supported = true; } } - affinity__cleanup(&affinity); + affinity__cleanup(affinity); evlist__for_each_entry(evsel_list, counter) { if (!counter->supported) { diff --git a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json index 73089c682f8032ae31ede885ca2bf1c4656ed256..41bac1c6a00800955ca280cfc714a090437d2035 100644 --- a/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json +++ b/tools/perf/pmu-events/arch/test/test_soc/cpu/uncore.json @@ -18,6 +18,22 @@ "Invert": "0", "EdgeDetect": "0" }, + { + "Unit": "CBO", + "EventCode": "0xE0", + "UMask": "0x00", + "EventName": "event-hyphen", + "BriefDescription": "UNC_CBO_HYPHEN", + "PublicDescription": "UNC_CBO_HYPHEN" + }, + { + "Unit": "CBO", + "EventCode": "0xC0", + "UMask": "0x00", + "EventName": "event-two-hyph", + "BriefDescription": "UNC_CBO_TWO_HYPH", + "PublicDescription": "UNC_CBO_TWO_HYPH" + }, { "EventCode": "0x7", "EventName": "uncore_hisi_l3c.rd_hit_cpipe", diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c index 0bf399c49849dea0f1f087a2279535dbbf5fed4e..4965dd6669566125d38ca3ef61714994aa1addb4 100644 --- a/tools/perf/tests/bitmap.c +++ b/tools/perf/tests/bitmap.c @@ -17,8 +17,8 @@ static unsigned long *get_bitmap(const char *str, int nbits) bm = bitmap_zalloc(nbits); if (map && bm) { - for (i = 0; i < map->nr; i++) - set_bit(map->map[i].cpu, bm); + for (i = 0; i < perf_cpu_map__nr(map); i++) + set_bit(perf_cpu_map__cpu(map, i).cpu, bm); } if (map) diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c index 16b6d6f47f38bd17c353885d53ed1c77de7e6822..78db4d704e76c1327817f80dc852abe2867ecf7e 100644 --- a/tools/perf/tests/event_update.c +++ b/tools/perf/tests/event_update.c @@ -75,10 +75,10 @@ static int process_event_cpus(struct perf_tool *tool __maybe_unused, TEST_ASSERT_VAL("wrong id", ev->id == 123); TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS); - TEST_ASSERT_VAL("wrong cpus", map->nr == 3); - TEST_ASSERT_VAL("wrong cpus", map->map[0].cpu == 1); - TEST_ASSERT_VAL("wrong cpus", map->map[1].cpu == 2); - TEST_ASSERT_VAL("wrong cpus", map->map[2].cpu == 3); + TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__nr(map) == 3); + TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 0).cpu == 1); + TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 1).cpu == 2); + TEST_ASSERT_VAL("wrong cpus", perf_cpu_map__cpu(map, 2).cpu == 3); perf_cpu_map__put(map); return 0; } diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c index f4a4aba33f76539c2d0f3a384d4294f8d7913657..4c96829510c916c89c64c4ee97e87d02d52035a5 100644 --- a/tools/perf/tests/mem2node.c +++ b/tools/perf/tests/mem2node.c @@ -25,14 +25,15 @@ static unsigned long *get_bitmap(const char *str, int nbits) { struct perf_cpu_map *map = perf_cpu_map__new(str); unsigned long *bm = NULL; - int i; bm = bitmap_zalloc(nbits); if (map && bm) { - for (i = 0; i < map->nr; i++) { - set_bit(map->map[i].cpu, bm); - } + struct perf_cpu cpu; + int i; + + perf_cpu_map__for_each_cpu(cpu, i, map) + set_bit(cpu.cpu, bm); } if (map) diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 0ad62914b4d7a6456683ef52daf4482fd9359a57..c3c17600f29c9644d3b082db0ed4fbf7fc955bbd 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -59,11 +59,12 @@ static int test__basic_mmap(struct test_suite *test __maybe_unused, int subtest } CPU_ZERO(&cpu_set); - CPU_SET(cpus->map[0].cpu, &cpu_set); + CPU_SET(perf_cpu_map__cpu(cpus, 0).cpu, &cpu_set); sched_setaffinity(0, sizeof(cpu_set), &cpu_set); if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) { pr_debug("sched_setaffinity() failed on CPU %d: %s ", - cpus->map[0].cpu, str_error_r(errno, sbuf, sizeof(sbuf))); + perf_cpu_map__cpu(cpus, 0).cpu, + str_error_r(errno, sbuf, sizeof(sbuf))); goto out_free_cpus; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index a508f1dbcb2ad4f19c5b6ab99c955c4b771e3584..e71efadb24f5e8e151dc2946b3555508f4e03e60 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2069,6 +2069,31 @@ static int test_event(struct evlist_test *e) return ret; } +static int test_event_fake_pmu(const char *str) +{ + struct parse_events_error err; + struct evlist *evlist; + int ret; + + evlist = evlist__new(); + if (!evlist) + return -ENOMEM; + + parse_events_error__init(&err); + perf_pmu__test_parse_init(); + ret = __parse_events(evlist, str, &err, &perf_pmu__fake); + if (ret) { + pr_debug("failed to parse event '%s', err %d, str '%s'\n", + str, ret, err.str); + parse_events_error__print(&err, str); + } + + parse_events_error__exit(&err); + evlist__delete(evlist); + + return ret; +} + static int test_events(struct evlist_test *events, unsigned cnt) { int ret1, ret2 = 0; @@ -2276,6 +2301,26 @@ static int test_pmu_events_alias(char *event, char *alias) return test_event(&e); } +static int test_pmu_events_alias2(void) +{ + static const char events[][30] = { + "event-hyphen", + "event-two-hyph", + }; + unsigned long i; + int ret = 0; + + for (i = 0; i < ARRAY_SIZE(events); i++) { + ret = test_event_fake_pmu(&events[i][0]); + if (ret) { + pr_err("check_parse_fake %s failed\n", &events[i][0]); + break; + } + } + + return ret; +} + static int test__parse_events(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { int ret1, ret2 = 0; @@ -2313,6 +2358,10 @@ do { \ return ret; } + ret1 = test_pmu_events_alias2(); + if (!ret2) + ret2 = ret1; + ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms)); if (!ret2) ret2 = ret1; diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index df1c9a3cc05bf31cfaae253af62f0aa47e6dbc8b..1c695fb5a79c63ff33ee27f1b8dde0a932478294 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -143,6 +143,34 @@ static const struct perf_pmu_test_event unc_cbo_xsnp_response_miss_eviction = { .matching_pmu = "uncore_cbox_0", }; +static const struct perf_pmu_test_event uncore_hyphen = { + .event = { + .name = "event-hyphen", + .event = "umask=0x00,event=0xe0", + .desc = "Unit: uncore_cbox UNC_CBO_HYPHEN", + .topic = "uncore", + .long_desc = "UNC_CBO_HYPHEN", + .pmu = "uncore_cbox", + }, + .alias_str = "umask=0,event=0xe0", + .alias_long_desc = "UNC_CBO_HYPHEN", + .matching_pmu = "uncore_cbox_0", +}; + +static const struct perf_pmu_test_event uncore_two_hyph = { + .event = { + .name = "event-two-hyph", + .event = "umask=0x00,event=0xc0", + .desc = "Unit: uncore_cbox UNC_CBO_TWO_HYPH", + .topic = "uncore", + .long_desc = "UNC_CBO_TWO_HYPH", + .pmu = "uncore_cbox", + }, + .alias_str = "umask=0,event=0xc0", + .alias_long_desc = "UNC_CBO_TWO_HYPH", + .matching_pmu = "uncore_cbox_0", +}; + static const struct perf_pmu_test_event uncore_hisi_l3c_rd_hit_cpipe = { .event = { .name = "uncore_hisi_l3c.rd_hit_cpipe", @@ -188,6 +216,8 @@ static const struct perf_pmu_test_event uncore_imc_cache_hits = { static const struct perf_pmu_test_event *uncore_events[] = { &uncore_hisi_ddrc_flux_wcmd, &unc_cbo_xsnp_response_miss_eviction, + &uncore_hyphen, + &uncore_two_hyph, &uncore_hisi_l3c_rd_hit_cpipe, &uncore_imc_free_running_cache_miss, &uncore_imc_cache_hits, @@ -654,6 +684,8 @@ static struct perf_pmu_test_pmu test_pmus[] = { }, .aliases = { &unc_cbo_xsnp_response_miss_eviction, + &uncore_hyphen, + &uncore_two_hyph, }, }, { diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c index c4ef0c7002f1b6ec3feb199f5ff2dfa908c38e21..ee1e3dcbc0bdb185f0ce98bf20210eddee58f339 100644 --- a/tools/perf/tests/topology.c +++ b/tools/perf/tests/topology.c @@ -122,44 +122,48 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) } // Test that CPU ID contains socket, die, core and CPU - for (i = 0; i < map->nr; i++) { + for (i = 0; i < perf_cpu_map__nr(map); i++) { id = aggr_cpu_id__cpu(perf_cpu_map__cpu(map, i), NULL); - TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match", map->map[i].cpu == id.cpu.cpu); + TEST_ASSERT_VAL("Cpu map - CPU ID doesn't match", + perf_cpu_map__cpu(map, i).cpu == id.cpu.cpu); TEST_ASSERT_VAL("Cpu map - Core ID doesn't match", - session->header.env.cpu[map->map[i].cpu].core_id == id.core); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core); TEST_ASSERT_VAL("Cpu map - Socket ID doesn't match", - session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == + id.socket); TEST_ASSERT_VAL("Cpu map - Die ID doesn't match", - session->header.env.cpu[map->map[i].cpu].die_id == id.die); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die); TEST_ASSERT_VAL("Cpu map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Cpu map - Thread is set", id.thread == -1); } // Test that core ID contains socket, die and core - for (i = 0; i < map->nr; i++) { + for (i = 0; i < perf_cpu_map__nr(map); i++) { id = aggr_cpu_id__core(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Core map - Core ID doesn't match", - session->header.env.cpu[map->map[i].cpu].core_id == id.core); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].core_id == id.core); TEST_ASSERT_VAL("Core map - Socket ID doesn't match", - session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == + id.socket); TEST_ASSERT_VAL("Core map - Die ID doesn't match", - session->header.env.cpu[map->map[i].cpu].die_id == id.die); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die); TEST_ASSERT_VAL("Core map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Core map - Thread is set", id.thread == -1); } // Test that die ID contains socket and die - for (i = 0; i < map->nr; i++) { + for (i = 0; i < perf_cpu_map__nr(map); i++) { id = aggr_cpu_id__die(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Die map - Socket ID doesn't match", - session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == + id.socket); TEST_ASSERT_VAL("Die map - Die ID doesn't match", - session->header.env.cpu[map->map[i].cpu].die_id == id.die); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].die_id == id.die); TEST_ASSERT_VAL("Die map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Die map - Core is set", id.core == -1); @@ -168,10 +172,11 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) } // Test that socket ID contains only socket - for (i = 0; i < map->nr; i++) { + for (i = 0; i < perf_cpu_map__nr(map); i++) { id = aggr_cpu_id__socket(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Socket map - Socket ID doesn't match", - session->header.env.cpu[map->map[i].cpu].socket_id == id.socket); + session->header.env.cpu[perf_cpu_map__cpu(map, i).cpu].socket_id == + id.socket); TEST_ASSERT_VAL("Socket map - Node ID is set", id.node == -1); TEST_ASSERT_VAL("Socket map - Die ID is set", id.die == -1); @@ -181,10 +186,10 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map) } // Test that node ID contains only node - for (i = 0; i < map->nr; i++) { + for (i = 0; i < perf_cpu_map__nr(map); i++) { id = aggr_cpu_id__node(perf_cpu_map__cpu(map, i), NULL); TEST_ASSERT_VAL("Node map - Node ID doesn't match", - cpu__get_node(map->map[i]) == id.node); + cpu__get_node(perf_cpu_map__cpu(map, i)) == id.node); TEST_ASSERT_VAL("Node map - Socket is set", id.socket == -1); TEST_ASSERT_VAL("Node map - Die ID is set", id.die == -1); TEST_ASSERT_VAL("Node map - Core is set", id.core == -1); diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh index 3109d7b05e113bb5ebe16738d8dc441bebf4533e..3d278785fe574a0e55043a6682c791b73c404e41 100755 --- a/tools/perf/trace/beauty/prctl_option.sh +++ b/tools/perf/trace/beauty/prctl_option.sh @@ -4,7 +4,7 @@ [ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/ printf "static const char *prctl_options[] = {\n" -regex='^#define[[:space:]]+PR_(\w+)[[:space:]]*([[:xdigit:]]+).*' +regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$' egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \ sed -r "s/$regex/\2 \1/g" | \ sort -n | xargs printf "\t[%s] = \"%s\",\n" diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c index f1e30d566db3c83553125182b2afe231bb9cb1ed..4d216c0dc4259b9ef812b41a793f911f74e44e9f 100644 --- a/tools/perf/util/affinity.c +++ b/tools/perf/util/affinity.c @@ -62,7 +62,7 @@ void affinity__set(struct affinity *a, int cpu) clear_bit(cpu, a->sched_cpus); } -void affinity__cleanup(struct affinity *a) +static void __affinity__cleanup(struct affinity *a) { int cpu_set_size = get_cpu_set_size(); @@ -71,3 +71,9 @@ void affinity__cleanup(struct affinity *a) zfree(&a->sched_cpus); zfree(&a->orig_cpus); } + +void affinity__cleanup(struct affinity *a) +{ + if (a != NULL) + __affinity__cleanup(a); +} diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 01900689dc0017a422bfaa443856b226c72d8a1e..8190a124b99d9d3ff0cb155330c60693bf2208ad 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2036,6 +2036,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) memset(&objdump_process, 0, sizeof(objdump_process)); objdump_process.argv = objdump_argv; objdump_process.out = -1; + objdump_process.err = -1; if (start_command(&objdump_process)) { pr_err("Failure starting to run %s\n", command); err = -1; diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 5632efc44738c760017ef950c33fc171336ac156..825336304a375995714c351c6c3fe0fa5c09c6e4 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -174,7 +174,7 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, mp->idx = idx; if (per_cpu) { - mp->cpu = evlist->core.cpus->map[idx]; + mp->cpu = perf_cpu_map__cpu(evlist->core.cpus, idx); if (evlist->core.threads) mp->tid = perf_thread_map__pid(evlist->core.threads, 0); else diff --git a/tools/perf/util/bpf_counter_cgroup.c b/tools/perf/util/bpf_counter_cgroup.c index 631e34a0b66ff084dc01fc01413ba35f38e4e6cc..ac60c08e8e2a5ba4653c554f8b6b23200fb8d39d 100644 --- a/tools/perf/util/bpf_counter_cgroup.c +++ b/tools/perf/util/bpf_counter_cgroup.c @@ -266,7 +266,7 @@ static int bperf_cgrp__read(struct evsel *evsel) idx = evsel->core.idx; err = bpf_map_lookup_elem(reading_map_fd, &idx, values); if (err) { - pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n", + pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n", idx, evsel__name(evsel), evsel->cgrp->name); goto out; } diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index 2b81707b9dbaef155eb3b14dde3f262376685bf4..7a447d9184588876429cefb59cfa181fe3e9b155 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -61,7 +61,7 @@ int evsel__alloc_counts(struct evsel *evsel) struct perf_cpu_map *cpus = evsel__cpus(evsel); int nthreads = perf_thread_map__nr(evsel->core.threads); - evsel->counts = perf_counts__new(cpus ? cpus->nr : 1, nthreads); + evsel->counts = perf_counts__new(perf_cpu_map__nr(cpus), nthreads); return evsel->counts != NULL ? 0 : -ENOMEM; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 0d3c2006a15d99190724c7fe21f41e05bf4a8862..703ae6d3386e3934baf489679242ba9a09086ce9 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -4,7 +4,6 @@ #include #include -#include #include #include @@ -57,7 +56,7 @@ struct perf_cpu cpu__max_present_cpu(void); */ static inline bool cpu_map__is_dummy(struct perf_cpu_map *cpus) { - return cpus->nr == 1 && cpus->map[0].cpu == -1; + return perf_cpu_map__nr(cpus) == 1 && perf_cpu_map__cpu(cpus, 0).cpu == -1; } /** diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c index e20b835a11943aebe2c20e7f31c1c0f243a03a73..d275d843c1550dd27e25e3a6af0e219b929f2849 100644 --- a/tools/perf/util/cputopo.c +++ b/tools/perf/util/cputopo.c @@ -325,7 +325,7 @@ struct numa_topology *numa_topology__new(void) if (!node_map) goto out; - nr = (u32) node_map->nr; + nr = (u32) perf_cpu_map__nr(node_map); tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr); if (!tp) @@ -334,7 +334,7 @@ struct numa_topology *numa_topology__new(void) tp->nr = nr; for (i = 0; i < nr; i++) { - if (load_numa_node(&tp->nodes[i], node_map->map[i].cpu)) { + if (load_numa_node(&tp->nodes[i], perf_cpu_map__cpu(node_map, i).cpu)) { numa_topology__delete(tp); tp = NULL; break; diff --git a/tools/perf/util/evlist-hybrid.c b/tools/perf/util/evlist-hybrid.c index 7c554234b43d4fff3272cc7f4a2045446df0a620..7f234215147d4bebbc3d6eeb7898f12edb288422 100644 --- a/tools/perf/util/evlist-hybrid.c +++ b/tools/perf/util/evlist-hybrid.c @@ -124,22 +124,23 @@ int evlist__fix_hybrid_cpus(struct evlist *evlist, const char *cpu_list) events_nr++; - if (matched_cpus->nr > 0 && (unmatched_cpus->nr > 0 || - matched_cpus->nr < cpus->nr || - matched_cpus->nr < pmu->cpus->nr)) { + if (perf_cpu_map__nr(matched_cpus) > 0 && + (perf_cpu_map__nr(unmatched_cpus) > 0 || + perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(cpus) || + perf_cpu_map__nr(matched_cpus) < perf_cpu_map__nr(pmu->cpus))) { perf_cpu_map__put(evsel->core.cpus); perf_cpu_map__put(evsel->core.own_cpus); evsel->core.cpus = perf_cpu_map__get(matched_cpus); evsel->core.own_cpus = perf_cpu_map__get(matched_cpus); - if (unmatched_cpus->nr > 0) { + if (perf_cpu_map__nr(unmatched_cpus) > 0) { cpu_map__snprint(matched_cpus, buf1, sizeof(buf1)); pr_warning("WARNING: use %s in '%s' for '%s', skip other cpus in list.\n", buf1, pmu->name, evsel->name); } } - if (matched_cpus->nr == 0) { + if (perf_cpu_map__nr(matched_cpus) == 0) { evlist__remove(evlist, evsel); evsel__delete(evsel); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6e88d404b5b3e96fece7d56b1efd0a90dd166c1e..eaad04e1672a47523b13114e828f4961eecec745 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -430,15 +430,19 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; struct evlist_cpu_iterator evlist_cpu_itr; - struct affinity affinity; + struct affinity saved_affinity, *affinity = NULL; bool has_imm = false; - if (affinity__setup(&affinity) < 0) - return; + // See explanation in evlist__close() + if (!cpu_map__is_dummy(evlist->core.cpus)) { + if (affinity__setup(&saved_affinity) < 0) + return; + affinity = &saved_affinity; + } /* Disable 'immediate' events last */ for (int imm = 0; imm <= 1; imm++) { - evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { + evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) { pos = evlist_cpu_itr.evsel; if (evsel__strcmp(pos, evsel_name)) continue; @@ -454,7 +458,7 @@ static void __evlist__disable(struct evlist *evlist, char *evsel_name) break; } - affinity__cleanup(&affinity); + affinity__cleanup(affinity); evlist__for_each_entry(evlist, pos) { if (evsel__strcmp(pos, evsel_name)) continue; @@ -487,12 +491,16 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name) { struct evsel *pos; struct evlist_cpu_iterator evlist_cpu_itr; - struct affinity affinity; + struct affinity saved_affinity, *affinity = NULL; - if (affinity__setup(&affinity) < 0) - return; + // See explanation in evlist__close() + if (!cpu_map__is_dummy(evlist->core.cpus)) { + if (affinity__setup(&saved_affinity) < 0) + return; + affinity = &saved_affinity; + } - evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) { + evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) { pos = evlist_cpu_itr.evsel; if (evsel__strcmp(pos, evsel_name)) continue; @@ -500,7 +508,7 @@ static void __evlist__enable(struct evlist *evlist, char *evsel_name) continue; evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx); } - affinity__cleanup(&affinity); + affinity__cleanup(affinity); evlist__for_each_entry(evlist, pos) { if (evsel__strcmp(pos, evsel_name)) continue; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2f6b18af49e5eb92cc82b80d9234b2fd40f023fc..22d3267ce29413dfd212c55b09bca871d0367d15 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1064,6 +1064,17 @@ void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_un { } +static void evsel__set_default_freq_period(struct record_opts *opts, + struct perf_event_attr *attr) +{ + if (opts->freq) { + attr->freq = 1; + attr->sample_freq = opts->freq; + } else { + attr->sample_period = opts->default_interval; + } +} + /* * The enable_on_exec/disabled value strategy: * @@ -1130,14 +1141,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, * We default some events to have a default interval. But keep * it a weak assumption overridable by the user. */ - if (!attr->sample_period) { - if (opts->freq) { - attr->freq = 1; - attr->sample_freq = opts->freq; - } else { - attr->sample_period = opts->default_interval; - } - } + if ((evsel->is_libpfm_event && !attr->sample_period) || + (!evsel->is_libpfm_event && (!attr->sample_period || + opts->user_freq != UINT_MAX || + opts->user_interval != ULLONG_MAX))) + evsel__set_default_freq_period(opts, attr); + /* * If attr->freq was set (here or earlier), ask for period * to be sampled. @@ -1782,7 +1791,7 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, nthreads = threads->nr; if (evsel->core.fd == NULL && - perf_evsel__alloc_fd(&evsel->core, cpus->nr, nthreads) < 0) + perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0) return -ENOMEM; evsel->open_flags = PERF_FLAG_FD_CLOEXEC; @@ -2020,9 +2029,10 @@ retry_open: test_attr__ready(); pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, cpus->map[idx].cpu, group_fd, evsel->open_flags); + pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); - fd = sys_perf_event_open(&evsel->core.attr, pid, cpus->map[idx].cpu, + fd = sys_perf_event_open(&evsel->core.attr, pid, + perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); FD(evsel, idx, thread) = fd; @@ -2038,7 +2048,8 @@ retry_open: bpf_counter__install_pe(evsel, idx, fd); if (unlikely(test_attr__enabled)) { - test_attr__open(&evsel->core.attr, pid, cpus->map[idx], + test_attr__open(&evsel->core.attr, pid, + perf_cpu_map__cpu(cpus, idx), fd, group_fd, evsel->open_flags); } @@ -2079,7 +2090,8 @@ try_fallback: if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__ignore_missing_thread(evsel, cpus->nr, idx, threads, thread, err)) { + if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus), + idx, threads, thread, err)) { /* We just removed 1 thread, so lower the upper nthreads limit. */ nthreads--; @@ -2119,7 +2131,7 @@ out_close: int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { - return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1); + return evsel__open_cpu(evsel, cpus, threads, 0, perf_cpu_map__nr(cpus)); } void evsel__close(struct evsel *evsel) @@ -2131,8 +2143,7 @@ void evsel__close(struct evsel *evsel) int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx) { if (cpu_map_idx == -1) - return evsel__open_cpu(evsel, cpus, NULL, 0, - cpus ? cpus->nr : 1); + return evsel__open_cpu(evsel, cpus, NULL, 0, perf_cpu_map__nr(cpus)); return evsel__open_cpu(evsel, cpus, NULL, cpu_map_idx, cpu_map_idx + 1); } @@ -2982,7 +2993,7 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist) struct perf_cpu_map *cpus = evsel->core.cpus; struct perf_thread_map *threads = evsel->core.threads; - if (perf_evsel__alloc_id(&evsel->core, cpus->nr, threads->nr)) + if (perf_evsel__alloc_id(&evsel->core, perf_cpu_map__nr(cpus), threads->nr)) return -ENOMEM; return store_evsel_ids(evsel, evlist); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 5720ceebffac05520d436d4df0f9d0ef36f7abfd..041b42d33bf5acf2d143bc3b80cfa788b7fb23f7 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -11,6 +11,7 @@ #include #include "symbol_conf.h" #include +#include struct bpf_object; struct cgroup; @@ -191,7 +192,7 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel) static inline int evsel__nr_cpus(struct evsel *evsel) { - return evsel__cpus(evsel)->nr; + return perf_cpu_map__nr(evsel__cpus(evsel)); } void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread, diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 3901440aeff926522700fde3e1b3221b050fc7e0..394550003693748a7a60105b2e0d2b3d805d6c67 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -16,6 +16,7 @@ #include "map_symbol.h" #include "branch.h" #include "mem-events.h" +#include "path.h" #include "srcline.h" #include "symbol.h" #include "sort.h" @@ -1416,7 +1417,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i struct stat st; /*sshfs might return bad dent->d_type, so we have to stat*/ - snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name); + path__join(path, sizeof(path), dir_name, dent->d_name); if (stat(path, &st)) continue; @@ -2072,6 +2073,7 @@ static void ip__resolve_ams(struct thread *thread, ams->addr = ip; ams->al_addr = al.addr; + ams->al_level = al.level; ams->ms.maps = al.maps; ams->ms.sym = al.sym; ams->ms.map = al.map; @@ -2091,6 +2093,7 @@ static void ip__resolve_data(struct thread *thread, ams->addr = addr; ams->al_addr = al.addr; + ams->al_level = al.level; ams->ms.maps = al.maps; ams->ms.sym = al.sym; ams->ms.map = al.map; diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h index 7d22ade082c8f891c5a718d1b549655621c841bb..e08817b0c30fdb6bd2294958e8942834b59ce0c4 100644 --- a/tools/perf/util/map_symbol.h +++ b/tools/perf/util/map_symbol.h @@ -18,6 +18,7 @@ struct addr_map_symbol { struct map_symbol ms; u64 addr; u64 al_addr; + char al_level; u64 phys_addr; u64 data_page_size; }; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 12261ed8c15b7c6a1e09a319eafedba9d6007a07..0e8ff8d1e206f2a2bc9793d37899acae540292de 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -250,7 +250,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask) nr_cpus = perf_cpu_map__nr(cpu_map); for (idx = 0; idx < nr_cpus; idx++) { - cpu = cpu_map->map[idx]; /* map c index to online cpu index */ + cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */ if (cpu__get_node(cpu) == node) set_bit(cpu.cpu, mask->bits); } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index acf20ce98ce9338e97cd21ee469ae5485faa0acb..9739b05b999eb0de8fa4a28cac452881bd3f5974 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1697,6 +1697,15 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, } } } + + if (parse_state->fake_pmu) { + if (!parse_events_add_pmu(parse_state, list, str, head, + true, true)) { + pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str); + ok++; + } + } + out_err: if (ok) *listp = list; @@ -2098,8 +2107,17 @@ static void perf_pmu__parse_init(void) pmu = NULL; while ((pmu = perf_pmu__scan(pmu)) != NULL) { list_for_each_entry(alias, &pmu->aliases, list) { - if (strchr(alias->name, '-')) + char *tmp = strchr(alias->name, '-'); + + if (tmp) { + char *tmp2 = NULL; + + tmp2 = strchr(tmp + 1, '-'); len++; + if (tmp2) + len++; + } + len++; } } @@ -2119,8 +2137,20 @@ static void perf_pmu__parse_init(void) list_for_each_entry(alias, &pmu->aliases, list) { struct perf_pmu_event_symbol *p = perf_pmu_events_list + len; char *tmp = strchr(alias->name, '-'); + char *tmp2 = NULL; - if (tmp != NULL) { + if (tmp) + tmp2 = strchr(tmp + 1, '-'); + if (tmp2) { + SET_SYMBOL(strndup(alias->name, tmp - alias->name), + PMU_EVENT_SYMBOL_PREFIX); + p++; + tmp++; + SET_SYMBOL(strndup(tmp, tmp2 - tmp), PMU_EVENT_SYMBOL_SUFFIX); + p++; + SET_SYMBOL(strdup(++tmp2), PMU_EVENT_SYMBOL_SUFFIX2); + len += 3; + } else if (tmp) { SET_SYMBOL(strndup(alias->name, tmp - alias->name), PMU_EVENT_SYMBOL_PREFIX); p++; @@ -2147,23 +2177,38 @@ err: */ int perf_pmu__test_parse_init(void) { - struct perf_pmu_event_symbol *list; + struct perf_pmu_event_symbol *list, *tmp, symbols[] = { + {(char *)"read", PMU_EVENT_SYMBOL}, + {(char *)"event", PMU_EVENT_SYMBOL_PREFIX}, + {(char *)"two", PMU_EVENT_SYMBOL_SUFFIX}, + {(char *)"hyphen", PMU_EVENT_SYMBOL_SUFFIX}, + {(char *)"hyph", PMU_EVENT_SYMBOL_SUFFIX2}, + }; + unsigned long i, j; - list = malloc(sizeof(*list) * 1); + tmp = list = malloc(sizeof(*list) * ARRAY_SIZE(symbols)); if (!list) return -ENOMEM; - list->type = PMU_EVENT_SYMBOL; - list->symbol = strdup("read"); - - if (!list->symbol) { - free(list); - return -ENOMEM; + for (i = 0; i < ARRAY_SIZE(symbols); i++, tmp++) { + tmp->type = symbols[i].type; + tmp->symbol = strdup(symbols[i].symbol); + if (!list->symbol) + goto err_free; } perf_pmu_events_list = list; - perf_pmu_events_list_num = 1; + perf_pmu_events_list_num = ARRAY_SIZE(symbols); + + qsort(perf_pmu_events_list, ARRAY_SIZE(symbols), + sizeof(struct perf_pmu_event_symbol), comp_pmu); return 0; + +err_free: + for (j = 0, tmp = list; j < i; j++, tmp++) + free(tmp->symbol); + free(list); + return -ENOMEM; } enum perf_pmu_event_symbol_type diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index c7fc93f54577098aef5b0eb5a83128c77cd87bc4..a38b8b160e80b4330364c07d73f6f5f9ff59ac20 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -53,6 +53,7 @@ enum perf_pmu_event_symbol_type { PMU_EVENT_SYMBOL, /* normal style PMU event */ PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */ PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */ + PMU_EVENT_SYMBOL_SUFFIX2, /* suffix of pre-suf2 style event */ }; struct perf_pmu_event_symbol { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 4efe9872c6678c5fe1837629a3fc3840afd039fc..5b6e4b5249cffe81bf6f274df1eef119fa14c739 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -149,6 +149,8 @@ static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_stat return PE_PMU_EVENT_PRE; case PMU_EVENT_SYMBOL_SUFFIX: return PE_PMU_EVENT_SUF; + case PMU_EVENT_SYMBOL_SUFFIX2: + return PE_PMU_EVENT_SUF2; case PMU_EVENT_SYMBOL: return parse_state->fake_pmu ? PE_PMU_EVENT_FAKE : PE_KERNEL_PMU_EVENT; diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 174158982fae42ceab80a16a0f5e6e03a2b41636..be8c517700517ebd1d588ed900320b6a900dd839 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list, %token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT %token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP %token PE_ERROR -%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %token PE_ARRAY_ALL PE_ARRAY_RANGE %token PE_DRV_CFG_TERM %type PE_VALUE @@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list, %type PE_MODIFIER_EVENT %type PE_MODIFIER_BP %type PE_EVENT_NAME -%type PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE +%type PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_PMU_EVENT_SUF2 PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type PE_DRV_CFG_TERM %type event_pmu_name %destructor { free ($$); } @@ -372,6 +372,19 @@ PE_KERNEL_PMU_EVENT opt_pmu_config $$ = list; } | +PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF '-' PE_PMU_EVENT_SUF2 sep_dc +{ + struct list_head *list; + char pmu_name[128]; + snprintf(pmu_name, sizeof(pmu_name), "%s-%s-%s", $1, $3, $5); + free($1); + free($3); + free($5); + if (parse_events_multi_pmu_add(_parse_state, pmu_name, NULL, &list) < 0) + YYABORT; + $$ = list; +} +| PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc { struct list_head *list; diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c index 734d006d9a8cac17c86117ded7f9bfb67e168987..c28dd50bd571b8b3190d21f7fed3dcfad4f746be 100644 --- a/tools/perf/util/perf_api_probe.c +++ b/tools/perf/util/perf_api_probe.c @@ -67,7 +67,7 @@ static bool perf_probe_api(setup_probe_fn_t fn) cpus = perf_cpu_map__new(NULL); if (!cpus) return false; - cpu = cpus->map[0]; + cpu = perf_cpu_map__cpu(cpus, 0); perf_cpu_map__put(cpus); do { @@ -144,7 +144,7 @@ bool perf_can_record_cpu_wide(void) if (!cpus) return false; - cpu = cpus->map[0]; + cpu = perf_cpu_map__cpu(cpus, 0); perf_cpu_map__put(cpus); fd = sys_perf_event_open(&attr, -1, cpu.cpu, -1, 0); diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 47b7531f51da06be80b10489804e1d4ec7a9bfdc..98af3fa4ea35351be6a10e303a228be6c2dfba7c 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -52,7 +52,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value) bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX), bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP), bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES), - bit_name(HW_INDEX), + bit_name(TYPE_SAVE), bit_name(HW_INDEX), { .name = NULL, } }; #undef bit_name diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b2a02c9ab8ea9f3a56360b87fc0a7531f74c1e14..a834918a0a0d33dfcd9708e1c8732550729a70f8 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -3083,6 +3083,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, for (j = 0; j < num_matched_functions; j++) { sym = syms[j]; + if (sym->type != STT_FUNC) + continue; + /* There can be duplicated symbols in the map */ for (i = 0; i < j; i++) if (sym->start == syms[i]->start) { diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index f3e5131f183c55ced8718683be014304bbd57118..52d8995cfd73ee1ecdaefc7f98b4052766fa3f6b 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -638,17 +638,17 @@ static Py_ssize_t pyrf_cpu_map__length(PyObject *obj) { struct pyrf_cpu_map *pcpus = (void *)obj; - return pcpus->cpus->nr; + return perf_cpu_map__nr(pcpus->cpus); } static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_cpu_map *pcpus = (void *)obj; - if (i >= pcpus->cpus->nr) + if (i >= perf_cpu_map__nr(pcpus->cpus)) return NULL; - return Py_BuildValue("i", pcpus->cpus->map[i]); + return Py_BuildValue("i", perf_cpu_map__cpu(pcpus->cpus, i).cpu); } static PySequenceMethods pyrf_cpu_map__sequence_methods = { diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 20461f17499122b68c436909cc3414965cfc8d7f..007a64681416bfce3f2d3e8f5d78d574b410c77f 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -106,7 +106,7 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call if (opts->group) evlist__set_leader(evlist); - if (evlist->core.cpus->map[0].cpu < 0) + if (perf_cpu_map__cpu(evlist->core.cpus, 0).cpu < 0) opts->no_inherit = true; use_comm_exec = perf_can_comm_exec(); @@ -248,11 +248,11 @@ bool evlist__can_select_event(struct evlist *evlist, const char *str) struct perf_cpu_map *cpus = perf_cpu_map__new(NULL); if (cpus) - cpu = cpus->map[0]; + cpu = perf_cpu_map__cpu(cpus, 0); perf_cpu_map__put(cpus); } else { - cpu = evlist->core.cpus->map[0]; + cpu = perf_cpu_map__cpu(evlist->core.cpus, 0); } while (1) { diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index f5ad0e62227ae3466942945f50f3705c1720818f..e752e1f4a5f0548a540676769182060a0927217b 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1607,8 +1607,8 @@ static void python_process_stat(struct perf_stat_config *config, } for (thread = 0; thread < threads->nr; thread++) { - for (cpu = 0; cpu < cpus->nr; cpu++) { - process_stat(counter, cpus->map[cpu], + for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { + process_stat(counter, perf_cpu_map__cpu(cpus, cpu), perf_thread_map__pid(threads, thread), tstamp, perf_counts(counter->counts, cpu, thread)); } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index f19348dddd5536da70a92917ca073e9331e5954f..498b05708db577326ff2e7a2919e59b7b02c48fa 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1503,11 +1503,12 @@ static int machines__deliver_event(struct machines *machines, ++evlist->stats.nr_unknown_id; return 0; } - dump_sample(evsel, event, sample, perf_env__arch(machine->env)); if (machine == NULL) { ++evlist->stats.nr_unprocessable_samples; + dump_sample(evsel, event, sample, perf_env__arch(NULL)); return 0; } + dump_sample(evsel, event, sample, perf_env__arch(machine->env)); return evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); @@ -2537,8 +2538,8 @@ int perf_session__cpu_bitmap(struct perf_session *session, return -1; } - for (i = 0; i < map->nr; i++) { - struct perf_cpu cpu = map->map[i]; + for (i = 0; i < perf_cpu_map__nr(map); i++) { + struct perf_cpu cpu = perf_cpu_map__cpu(map, i); if (cpu.cpu >= nr_cpus) { pr_err("Requested CPU %d too large. " diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index cfba8c3377835c9df1aaa9e2f2505a01a907671f..2da081ef532b2635535ab15c142151b2e2a1f798 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -915,7 +915,7 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf, struct addr_map_symbol *from = &he->branch_info->from; return _hist_entry__sym_snprintf(&from->ms, from->al_addr, - he->level, bf, size, width); + from->al_level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); @@ -928,7 +928,7 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf, struct addr_map_symbol *to = &he->branch_info->to; return _hist_entry__sym_snprintf(&to->ms, to->al_addr, - he->level, bf, size, width); + to->al_level, bf, size, width); } return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A"); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 5db83e51ceefb4df6e00f35835209083272bc3a1..9cbe351b141fd09a91d454d5037a9221c741f858 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -585,15 +585,16 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { - if (strcmp(evsel__name(alias), evsel__name(counter)) || - alias->scale != counter->scale || - alias->cgrp != counter->cgrp || - strcmp(alias->unit, counter->unit) || - evsel__is_clock(alias) != evsel__is_clock(counter) || - !strcmp(alias->pmu_name, counter->pmu_name)) - break; - alias->merged_stat = true; - cb(config, alias, data, false); + /* Merge events with the same name, etc. but on different PMUs. */ + if (!strcmp(evsel__name(alias), evsel__name(counter)) && + alias->scale == counter->scale && + alias->cgrp == counter->cgrp && + !strcmp(alias->unit, counter->unit) && + evsel__is_clock(alias) == evsel__is_clock(counter) && + strcmp(alias->pmu_name, counter->pmu_name)) { + alias->merged_stat = true; + cb(config, alias, data, false); + } } } diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 4c9f211249db3032308b2f93bc680f21e937ab01..1e0c731fc5396e31a1c1e315e15bb7793d0fcacb 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -734,8 +734,8 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus) if (!m) return -1; - for (i = 0; i < m->nr; i++) { - c = m->map[i]; + for (i = 0; i < perf_cpu_map__nr(m); i++) { + c = perf_cpu_map__cpu(m, i); if (c.cpu >= nr_cpus) { ret = -1; break; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index c9ba8050cc2ba80e530af901a672a77eae78b62e..b654de0841f87422cdf5f7033a3f9be96d4fc31f 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -1186,12 +1186,12 @@ int perf_event__synthesize_thread_map2(struct perf_tool *tool, static void synthesize_cpus(struct cpu_map_entries *cpus, struct perf_cpu_map *map) { - int i; + int i, map_nr = perf_cpu_map__nr(map); - cpus->nr = map->nr; + cpus->nr = map_nr; - for (i = 0; i < map->nr; i++) - cpus->cpu[i] = map->map[i].cpu; + for (i = 0; i < map_nr; i++) + cpus->cpu[i] = perf_cpu_map__cpu(map, i).cpu; } static void synthesize_mask(struct perf_record_record_cpu_map *mask, @@ -1202,13 +1202,13 @@ static void synthesize_mask(struct perf_record_record_cpu_map *mask, mask->nr = BITS_TO_LONGS(max); mask->long_size = sizeof(long); - for (i = 0; i < map->nr; i++) - set_bit(map->map[i].cpu, mask->mask); + for (i = 0; i < perf_cpu_map__nr(map); i++) + set_bit(perf_cpu_map__cpu(map, i).cpu, mask->mask); } static size_t cpus_size(struct perf_cpu_map *map) { - return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16); + return sizeof(struct cpu_map_entries) + perf_cpu_map__nr(map) * sizeof(u16); } static size_t mask_size(struct perf_cpu_map *map, int *max) @@ -1217,9 +1217,9 @@ static size_t mask_size(struct perf_cpu_map *map, int *max) *max = 0; - for (i = 0; i < map->nr; i++) { + for (i = 0; i < perf_cpu_map__nr(map); i++) { /* bit position of the cpu is + 1 */ - int bit = map->map[i].cpu + 1; + int bit = perf_cpu_map__cpu(map, i).cpu + 1; if (bit > *max) *max = bit; @@ -1784,6 +1784,25 @@ int __machine__synthesize_threads(struct machine *machine, struct perf_tool *too perf_event__handler_t process, bool needs_mmap, bool data_mmap, unsigned int nr_threads_synthesize) { + /* + * When perf runs in non-root PID namespace, and the namespace's proc FS + * is not mounted, nsinfo__is_in_root_namespace() returns false. + * In this case, the proc FS is coming for the parent namespace, thus + * perf tool will wrongly gather process info from its parent PID + * namespace. + * + * To avoid the confusion that the perf tool runs in a child PID + * namespace but it synthesizes thread info from its parent PID + * namespace, returns failure with warning. + */ + if (!nsinfo__is_in_root_namespace()) { + pr_err("Perf runs in non-root PID namespace but it tries to "); + pr_err("gather process info from its parent PID namespace.\n"); + pr_err("Please mount the proc file system properly, e.g. "); + pr_err("add the option '--mount-proc' for unshare command.\n"); + return -EPERM; + } + if (target__has_task(target)) return perf_event__synthesize_thread_map(tool, threads, process, machine, needs_mmap, data_mmap); diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c index 27945eeb0cb589cb022dbedcc6630b4ec4cb227f..c1ebfc5d2e0cbb4e02a0d5e08d226180003fc0d0 100644 --- a/tools/perf/util/top.c +++ b/tools/perf/util/top.c @@ -95,15 +95,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size) if (target->cpu_list) ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)", - top->evlist->core.cpus->nr > 1 ? "s" : "", + perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : "", target->cpu_list); else { if (target->tid) ret += SNPRINTF(bf + ret, size - ret, ")"); else ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)", - top->evlist->core.cpus->nr, - top->evlist->core.cpus->nr > 1 ? "s" : ""); + perf_cpu_map__nr(top->evlist->core.cpus), + perf_cpu_map__nr(top->evlist->core.cpus) > 1 ? "s" : ""); } perf_top__reset_sample_counters(top); diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index b0be5f40a3f11bfaa194d21a696b7bf63bff3a93..79d1023044707acb716f7dc381bd3e13f9314c7d 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -90,7 +90,7 @@ EXTRA_WARNINGS += -Wstrict-aliasing=3 else ifneq ($(CROSS_COMPILE),) CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc)) +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc 2>/dev/null)) ifneq ($(GCC_TOOLCHAIN_DIR),) CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) CLANG_CROSS_FLAGS += --sysroot=$(shell $(CROSS_COMPILE)gcc -print-sysroot) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 44bbe54f25f1a00e7790b64266bb5404dd998717..3c4196cef3ed9123344dc61908aa714f5b86cbc2 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -6,6 +6,7 @@ # Author: Felix Guo # Author: Brendan Higgins +import importlib.abc import importlib.util import logging import subprocess diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py index 4f32133ed77c8ad2edab71492dc7d5b76ac96417..13d854afca9dd720a11f673be80bc21c334f70ea 100755 --- a/tools/testing/kunit/run_checks.py +++ b/tools/testing/kunit/run_checks.py @@ -61,7 +61,7 @@ def main(argv: Sequence[str]) -> None: elif isinstance(ex, subprocess.CalledProcessError): print(f'{name}: FAILED') else: - print('{name}: unexpected exception: {ex}') + print(f'{name}: unexpected exception: {ex}') continue output = ex.output diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 16ec895bbe5ff5c2be9b091fd085723d1511b887..5bd9e6e80625406bbf175161cae033ad567e5cce 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -74,7 +74,7 @@ static inline unsigned long page_to_phys(struct page *page) __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \ x, y) -#define preemptible() (1) +#define pagefault_disabled() (0) static inline void *kmap(struct page *page) { @@ -127,6 +127,7 @@ kmalloc_array(unsigned int n, unsigned int size, unsigned int flags) #define kmemleak_free(a) #define PageSlab(p) (0) +#define flush_dcache_page(p) #define MAX_ERRNO 4095 diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index af798b9d232cc48723b88c5e2bade1fef837b13b..a3c1e67441f9ec795688b85d518f876bf30d7a4d 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -261,7 +261,7 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) } ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, - "Set FPSIMD registers via %s\n", type->name); + "Got FPSIMD registers via %s\n", type->name); if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) goto out; @@ -557,7 +557,14 @@ static int do_parent(pid_t child) } /* prctl() flags */ - ptrace_set_get_inherit(child, &vec_types[i]); + if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) { + ptrace_set_get_inherit(child, &vec_types[i]); + } else { + ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n", + vec_types[i].name); + ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n", + vec_types[i].name); + } /* Step through every possible VQ */ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c index ebf68dce550484294b3544082e489db2c5a1f463..2893e9f2f1e0a240a00aae65842c86164523743c 100644 --- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c +++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c @@ -28,7 +28,6 @@ // 5. We can read keycode from same /dev/lirc device #include -#include #include #include #include diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index 076cf4325f7832e535458ec282951cddb38a7d11..cd4582129c7d69c1e45b6a3acbd7a5f2c6c07e67 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -126,8 +126,6 @@ static void test_clone3(uint64_t flags, size_t size, int expected, int main(int argc, char *argv[]) { - pid_t pid; - uid_t uid = getuid(); ksft_print_header(); diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh index 31f8c9a76c5ff9331c9eeb9f9c69ed4cb35f57b0..60ce18ed066607c13d8261b2a60eeb426cd75f9d 100755 --- a/tools/testing/selftests/cpufreq/main.sh +++ b/tools/testing/selftests/cpufreq/main.sh @@ -194,5 +194,5 @@ prerequisite # Run requested functions clear_dumps $OUTFILE -do_test >> $OUTFILE.txt +do_test | tee -a $OUTFILE.txt dmesg_dumps $OUTFILE diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index dd61118df66edbcde89f92a2626d2e9186122fda..12c5e27d32c1664061e042dbb38fa49cf767a05a 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -5,7 +5,7 @@ CFLAGS += -D_GNU_SOURCE TEST_PROGS := binfmt_script non-regular TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 -TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe +TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index 12631f0076a10274dff566228b05748e9a217be0..11e157d7533b8fbaf5ac691b3df7057e0ff558c1 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -11,7 +11,7 @@ all: @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ if [ -e $$DIR/$(TEST_PROGS) ]; then \ rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \ fi \ @@ -32,6 +32,6 @@ override define CLEAN @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ done endef diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c index 06256c96df12e43f411a6ee5b0555fdc5d92bb2a..f4a15cbdd5ea4e2d87f3a861be340b886fd17eac 100644 --- a/tools/testing/selftests/ir/ir_loopback.c +++ b/tools/testing/selftests/ir/ir_loopback.c @@ -29,6 +29,16 @@ #define SYSFS_PATH_MAX 256 #define DNAME_PATH_MAX 256 +/* + * Support ancient lirc.h which does not have these values. Can be removed + * once RHEL 8 is no longer a relevant testing platform. + */ +#if RC_PROTO_MAX < 26 +#define RC_PROTO_RCMM12 24 +#define RC_PROTO_RCMM24 25 +#define RC_PROTO_RCMM32 26 +#endif + static const struct { enum rc_proto proto; const char *name; diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 471eaa7b3a3f20150bcb385eded0c8155d9e05be..11779405dc804d34ef8ac767ec5e22b6d2b5c0ba 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -877,7 +877,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) } t->timed_out = true; - kill(t->pid, SIGKILL); + // signal process group + kill(-(t->pid), SIGKILL); } void __wait_for_test(struct __test_metadata *t) @@ -987,6 +988,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) { + setpgrp(); t->fn(t, variant); if (t->skip) _exit(255); diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 8c129961accfea6f4e6c2524413704025b656d46..dce7de7755e6cebe14a14f8d7d2f34c44294c8ac 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -8,11 +8,12 @@ /s390x/memop /s390x/resets /s390x/sync_regs_test +/x86_64/amx_test +/x86_64/cpuid_test /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test /x86_64/emulator_error_test -/x86_64/get_cpuid_test /x86_64/get_msr_index_features /x86_64/kvm_clock_test /x86_64/kvm_pv_test @@ -22,6 +23,7 @@ /x86_64/mmio_warning_test /x86_64/mmu_role_test /x86_64/platform_info_test +/x86_64/pmu_event_filter_test /x86_64/set_boot_cpu_id /x86_64/set_sregs_test /x86_64/sev_migrate_tests @@ -36,6 +38,7 @@ /x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test +/x86_64/vmx_exception_with_invalid_guest_state /x86_64/vmx_invalid_nested_guest_state /x86_64/vmx_preemption_timer_test /x86_64/vmx_set_nested_state_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index ee8cf2149824ce1d1464518d224c52a6c0059c86..0e4926bc9a58dfab7a0400b9df6d823c00d73ab4 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -43,11 +43,11 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handler LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c -TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test +TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test -TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features @@ -56,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test +TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/smm_test @@ -69,6 +70,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test @@ -83,6 +85,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_pi_mmio_test TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests TEST_GEN_PROGS_x86_64 += x86_64/amx_test +TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 66775de26952d142aa19d7e0531f3985544ad4f2..4ed6aa049a91c75367375b0969e759d388e1ff07 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -345,7 +345,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, * guest_code - The vCPU's entry point */ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); -void vm_xsave_req_perm(void); bool vm_is_unrestricted_guest(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e94ba0fc67d80ba072ee410eb2c2aec4e8897d29..8a470da7b71affacaffe44d9ffccc6947eebd67d 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -364,6 +364,24 @@ static inline unsigned long get_xmm(int n) } bool is_intel_cpu(void); +bool is_amd_cpu(void); + +static inline unsigned int x86_family(unsigned int eax) +{ + unsigned int x86; + + x86 = (eax >> 8) & 0xf; + + if (x86 == 0xf) + x86 += (eax >> 20) & 0xff; + + return x86; +} + +static inline unsigned int x86_model(unsigned int eax) +{ + return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); +} struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, @@ -375,6 +393,8 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index); struct kvm_cpuid2 *kvm_get_supported_cpuid(void); struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid); +int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_cpuid2 *cpuid); void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); @@ -418,6 +438,11 @@ uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr); void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr, uint64_t pte); +/* + * get_cpuid() - find matching CPUID entry and return pointer to it. + */ +struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function, + uint32_t index); /* * set_cpuid() - overwrites a matching cpuid entry with the provided value. * matches based on ent->function && ent->index. returns true @@ -433,6 +458,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); +void vm_xsave_req_perm(int bit); enum x86_page_size { X86_PAGE_SIZE_4K = 0, diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 4a645dc77f3454caf1414ff9e80ac4726539566f..d8cf851ab11986419d892f1c6159c64b9bd6b9db 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -393,11 +393,6 @@ struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, struct kvm_vm *vm; int i; - /* - * Permission needs to be requested before KVM_SET_CPUID2. - */ - vm_xsave_req_perm(); - /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */ if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES) slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES; @@ -497,9 +492,11 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, uint64_t first_page, uint32_t num_pages) { - struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot, - .first_page = first_page, - .num_pages = num_pages }; + struct kvm_clear_dirty_log args = { + .dirty_bitmap = log, .slot = slot, + .first_page = first_page, + .num_pages = num_pages + }; int ret; ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index babb0f28575c71bdad93cf8c3e1a0d40dbeadd2e..9f000dfb55949d04f547caa4ef4d13b44bd60d4c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -665,16 +665,31 @@ static bool is_xfd_supported(void) return !!(eax & CPUID_XFD_BIT); } -void vm_xsave_req_perm(void) +void vm_xsave_req_perm(int bit) { - unsigned long bitmask; + int kvm_fd; + u64 bitmask; long rc; + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + kvm_fd = open_kvm_dev_path_or_exit(); + rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); + close(kvm_fd); + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) + exit(KSFT_SKIP); + TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); + if (!(bitmask & (1ULL << bit))) + exit(KSFT_SKIP); if (!is_xfd_supported()) - return; + exit(KSFT_SKIP); + + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); - rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, - XSTATE_XTILE_DATA_BIT); /* * The older kernel version(<5.15) can't support * ARCH_REQ_XCOMP_GUEST_PERM and directly return. @@ -684,7 +699,7 @@ void vm_xsave_req_perm(void) rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); - TEST_ASSERT(bitmask & XFEATURE_XTILE_MASK, + TEST_ASSERT(bitmask & (1ULL << bit), "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx", bitmask); } @@ -886,6 +901,17 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) return entry; } + +int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_cpuid2 *cpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + return ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); +} + /* * VM VCPU CPUID Set * @@ -903,12 +929,9 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); int rc; - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); + rc = __vcpu_set_cpuid(vm, vcpuid, cpuid); TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", rc, errno); @@ -1136,25 +1159,25 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); list->nmsrs = nmsrs; r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", + r); state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", + r); r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", + r); r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", + r); r = vcpu_save_xsave_state(vm, vcpu, state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", + r); if (kvm_check_cap(KVM_CAP_XCRS)) { r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); @@ -1163,17 +1186,17 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) } r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", + r); if (nested_size) { state->nested.size = sizeof(state->nested_); r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", - r); + r); TEST_ASSERT(state->nested.size <= nested_size, - "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", - state->nested.size, nested_size); + "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", + state->nested.size, nested_size); } else state->nested.size = 0; @@ -1181,12 +1204,12 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) for (i = 0; i < nmsrs; i++) state->msrs.entries[i].index = list->indices[i]; r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); - TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", - r, r == nmsrs ? -1 : list->indices[r]); + TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", + r, r == nmsrs ? -1 : list->indices[r]); r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", + r); free(list); return state; @@ -1199,7 +1222,7 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", - r); + r); r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); TEST_ASSERT(r == state->msrs.nmsrs, @@ -1214,28 +1237,28 @@ void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *s r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", - r); + r); r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", + r); r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", + r); r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", + r); r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", - r); + TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", + r); if (state->nested.size) { r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", - r); + r); } } @@ -1245,10 +1268,10 @@ void kvm_x86_state_cleanup(struct kvm_x86_state *state) free(state); } -bool is_intel_cpu(void) +static bool cpu_vendor_string_is(const char *vendor) { + const uint32_t *chunk = (const uint32_t *)vendor; int eax, ebx, ecx, edx; - const uint32_t *chunk; const int leaf = 0; __asm__ __volatile__( @@ -1257,10 +1280,22 @@ bool is_intel_cpu(void) "=c"(ecx), "=d"(edx) : /* input */ "0"(leaf), "2"(0)); - chunk = (const uint32_t *)("GenuineIntel"); return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); } +bool is_intel_cpu(void) +{ + return cpu_vendor_string_is("GenuineIntel"); +} + +/* + * Exclude early K5 samples with a vendor string of "AMDisbetter!" + */ +bool is_amd_cpu(void) +{ + return cpu_vendor_string_is("AuthenticAMD"); +} + uint32_t kvm_get_cpuid_max_basic(void) { return kvm_get_supported_cpuid_entry(0)->eax; @@ -1384,6 +1419,23 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) } } +struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function, + uint32_t index) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; + + if (cur->function == function && cur->index == index) + return cur; + } + + TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index); + + return NULL; +} + bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent) { @@ -1479,22 +1531,6 @@ struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpui return cpuid; } -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541 -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163 -#define X86EMUL_CPUID_VENDOR_AuthenticAMD_edx 0x69746e65 - -static inline unsigned x86_family(unsigned int eax) -{ - unsigned int x86; - - x86 = (eax >> 8) & 0xf; - - if (x86 == 0xf) - x86 += (eax >> 20) & 0xff; - - return x86; -} - unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ @@ -1504,11 +1540,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1; /* Avoid reserved HyperTransport region on AMD processors. */ - eax = ecx = 0; - cpuid(&eax, &ebx, &ecx, &edx); - if (ebx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx || - ecx != X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx || - edx != X86EMUL_CPUID_VENDOR_AuthenticAMD_edx) + if (!is_amd_cpu()) return max_gfn; /* On parts with <40 physical address bits, the area is fully hidden */ @@ -1518,6 +1550,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) /* Before family 17h, the HyperTransport area is just below 1T. */ ht_gfn = (1 << 28) - num_ht_pages; eax = 1; + ecx = 0; cpuid(&eax, &ebx, &ecx, &edx); if (x86_family(eax) < 0x17) goto done; diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 523c1e99ed644669ca5e0ec4ce51041f2c878b3b..52a3ef6629e80610c2d9776f659869fd757c1341 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -329,6 +329,8 @@ int main(int argc, char *argv[]) u32 amx_offset; int stage, ret; + vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT); + /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c similarity index 83% rename from tools/testing/selftests/kvm/x86_64/get_cpuid_test.c rename to tools/testing/selftests/kvm/x86_64/cpuid_test.c index a711f83749eab94c89ea235a963cebd0533a97ed..16d2465c56342d14de9b03d229c9176092a68064 100644 --- a/tools/testing/selftests/kvm/x86_64/get_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -154,6 +154,34 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct return guest_cpuids; } +static void set_cpuid_after_run(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid) +{ + struct kvm_cpuid_entry2 *ent; + int rc; + u32 eax, ebx, x; + + /* Setting unmodified CPUID is allowed */ + rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid); + TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); + + /* Changing CPU features is forbidden */ + ent = get_cpuid(cpuid, 0x7, 0); + ebx = ent->ebx; + ent->ebx--; + rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid); + TEST_ASSERT(rc, "Changing CPU features should fail"); + ent->ebx = ebx; + + /* Changing MAXPHYADDR is forbidden */ + ent = get_cpuid(cpuid, 0x80000008, 0); + eax = ent->eax; + x = eax & 0xff; + ent->eax = (eax & ~0xffu) | (x - 1); + rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid); + TEST_ASSERT(rc, "Changing MAXPHYADDR should fail"); + ent->eax = eax; +} + int main(void) { struct kvm_cpuid2 *supp_cpuid, *cpuid2; @@ -175,5 +203,7 @@ int main(void) for (stage = 0; stage < 3; stage++) run_vcpu(vm, VCPU_ID, stage); + set_cpuid_after_run(vm, cpuid2); + kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c new file mode 100644 index 0000000000000000000000000000000000000000..c715adcbd4870181849d490605615519a37007fb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test for x86 KVM_SET_PMU_EVENT_FILTER. + * + * Copyright (C) 2022, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Verifies the expected behavior of allow lists and deny lists for + * virtual PMU events. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +/* + * In lieu of copying perf_event.h into tools... + */ +#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) +#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) + +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +union cpuid10_ebx { + struct { + unsigned int no_unhalted_core_cycles:1; + unsigned int no_instructions_retired:1; + unsigned int no_unhalted_reference_cycles:1; + unsigned int no_llc_reference:1; + unsigned int no_llc_misses:1; + unsigned int no_branch_instruction_retired:1; + unsigned int no_branch_misses_retired:1; + } split; + unsigned int full; +}; + +/* End of stuff taken from perf_event.h. */ + +/* Oddly, this isn't in perf_event.h. */ +#define ARCH_PERFMON_BRANCHES_RETIRED 5 + +#define VCPU_ID 0 +#define NUM_BRANCHES 42 + +/* + * This is how the event selector and unit mask are stored in an AMD + * core performance event-select register. Intel's format is similar, + * but the event selector is only 8 bits. + */ +#define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \ + (umask & 0xff) << 8) + +/* + * "Branch instructions retired", from the Intel SDM, volume 3, + * "Pre-defined Architectural Performance Events." + */ + +#define INTEL_BR_RETIRED EVENT(0xc4, 0) + +/* + * "Retired branch instructions", from Processor Programming Reference + * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors, + * Preliminary Processor Programming Reference (PPR) for AMD Family + * 17h Model 31h, Revision B0 Processors, and Preliminary Processor + * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision + * B1 Processors Volume 1 of 2. + */ + +#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0) + +/* + * This event list comprises Intel's eight architectural events plus + * AMD's "retired branch instructions" for Zen[123] (and possibly + * other AMD CPUs). + */ +static const uint64_t event_list[] = { + EVENT(0x3c, 0), + EVENT(0xc0, 0), + EVENT(0x3c, 1), + EVENT(0x2e, 0x4f), + EVENT(0x2e, 0x41), + EVENT(0xc4, 0), + EVENT(0xc5, 0), + EVENT(0xa4, 1), + AMD_ZEN_BR_RETIRED, +}; + +/* + * If we encounter a #GP during the guest PMU sanity check, then the guest + * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0). + */ +static void guest_gp_handler(struct ex_regs *regs) +{ + GUEST_SYNC(0); +} + +/* + * Check that we can write a new value to the given MSR and read it back. + * The caller should provide a non-empty set of bits that are safe to flip. + * + * Return on success. GUEST_SYNC(0) on error. + */ +static void check_msr(uint32_t msr, uint64_t bits_to_flip) +{ + uint64_t v = rdmsr(msr) ^ bits_to_flip; + + wrmsr(msr, v); + if (rdmsr(msr) != v) + GUEST_SYNC(0); + + v ^= bits_to_flip; + wrmsr(msr, v); + if (rdmsr(msr) != v) + GUEST_SYNC(0); +} + +static void intel_guest_code(void) +{ + check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1); + check_msr(MSR_P6_EVNTSEL0, 0xffff); + check_msr(MSR_IA32_PMC0, 0xffff); + GUEST_SYNC(1); + + for (;;) { + uint64_t br0, br1; + + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1); + br0 = rdmsr(MSR_IA32_PMC0); + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + br1 = rdmsr(MSR_IA32_PMC0); + GUEST_SYNC(br1 - br0); + } +} + +/* + * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23], + * this code uses the always-available, legacy K7 PMU MSRs, which alias to + * the first four of the six extended core PMU MSRs. + */ +static void amd_guest_code(void) +{ + check_msr(MSR_K7_EVNTSEL0, 0xffff); + check_msr(MSR_K7_PERFCTR0, 0xffff); + GUEST_SYNC(1); + + for (;;) { + uint64_t br0, br1; + + wrmsr(MSR_K7_EVNTSEL0, 0); + wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED); + br0 = rdmsr(MSR_K7_PERFCTR0); + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + br1 = rdmsr(MSR_K7_PERFCTR0); + GUEST_SYNC(br1 - br0); + } +} + +/* + * Run the VM to the next GUEST_SYNC(value), and return the value passed + * to the sync. Any other exit from the guest is fatal. + */ +static uint64_t run_vm_to_sync(struct kvm_vm *vm) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + get_ucall(vm, VCPU_ID, &uc); + TEST_ASSERT(uc.cmd == UCALL_SYNC, + "Received ucall other than UCALL_SYNC: %lu", uc.cmd); + return uc.args[1]; +} + +/* + * In a nested environment or if the vPMU is disabled, the guest PMU + * might not work as architected (accessing the PMU MSRs may raise + * #GP, or writes could simply be discarded). In those situations, + * there is no point in running these tests. The guest code will perform + * a sanity check and then GUEST_SYNC(success). In the case of failure, + * the behavior of the guest on resumption is undefined. + */ +static bool sanity_check_pmu(struct kvm_vm *vm) +{ + bool success; + + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + success = run_vm_to_sync(vm); + vm_install_exception_handler(vm, GP_VECTOR, NULL); + + return success; +} + +static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents) +{ + struct kvm_pmu_event_filter *f; + int size = sizeof(*f) + nevents * sizeof(f->events[0]); + + f = malloc(size); + TEST_ASSERT(f, "Out of memory"); + memset(f, 0, size); + f->nevents = nevents; + return f; +} + +static struct kvm_pmu_event_filter *event_filter(uint32_t action) +{ + struct kvm_pmu_event_filter *f; + int i; + + f = make_pmu_event_filter(ARRAY_SIZE(event_list)); + f->action = action; + for (i = 0; i < ARRAY_SIZE(event_list); i++) + f->events[i] = event_list[i]; + + return f; +} + +/* + * Remove the first occurrence of 'event' (if any) from the filter's + * event list. + */ +static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, + uint64_t event) +{ + bool found = false; + int i; + + for (i = 0; i < f->nevents; i++) { + if (found) + f->events[i - 1] = f->events[i]; + else + found = f->events[i] == event; + } + if (found) + f->nevents--; + return f; +} + +static void test_without_filter(struct kvm_vm *vm) +{ + uint64_t count = run_vm_to_sync(vm); + + if (count != NUM_BRANCHES) + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", + __func__, count, NUM_BRANCHES); + TEST_ASSERT(count, "Allowed PMU event is not counting"); +} + +static uint64_t test_with_filter(struct kvm_vm *vm, + struct kvm_pmu_event_filter *f) +{ + vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f); + return run_vm_to_sync(vm); +} + +static void test_member_deny_list(struct kvm_vm *vm) +{ + struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); + uint64_t count = test_with_filter(vm, f); + + free(f); + if (count) + pr_info("%s: Branch instructions retired = %lu (expected 0)\n", + __func__, count); + TEST_ASSERT(!count, "Disallowed PMU Event is counting"); +} + +static void test_member_allow_list(struct kvm_vm *vm) +{ + struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); + uint64_t count = test_with_filter(vm, f); + + free(f); + if (count != NUM_BRANCHES) + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", + __func__, count, NUM_BRANCHES); + TEST_ASSERT(count, "Allowed PMU event is not counting"); +} + +static void test_not_member_deny_list(struct kvm_vm *vm) +{ + struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); + uint64_t count; + + remove_event(f, INTEL_BR_RETIRED); + remove_event(f, AMD_ZEN_BR_RETIRED); + count = test_with_filter(vm, f); + free(f); + if (count != NUM_BRANCHES) + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", + __func__, count, NUM_BRANCHES); + TEST_ASSERT(count, "Allowed PMU event is not counting"); +} + +static void test_not_member_allow_list(struct kvm_vm *vm) +{ + struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); + uint64_t count; + + remove_event(f, INTEL_BR_RETIRED); + remove_event(f, AMD_ZEN_BR_RETIRED); + count = test_with_filter(vm, f); + free(f); + if (count) + pr_info("%s: Branch instructions retired = %lu (expected 0)\n", + __func__, count); + TEST_ASSERT(!count, "Disallowed PMU Event is counting"); +} + +/* + * Check for a non-zero PMU version, at least one general-purpose + * counter per logical processor, an EBX bit vector of length greater + * than 5, and EBX[5] clear. + */ +static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry) +{ + union cpuid10_eax eax = { .full = entry->eax }; + union cpuid10_ebx ebx = { .full = entry->ebx }; + + return eax.split.version_id && eax.split.num_counters > 0 && + eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED && + !ebx.split.no_branch_instruction_retired; +} + +/* + * Note that CPUID leaf 0xa is Intel-specific. This leaf should be + * clear on AMD hardware. + */ +static bool use_intel_pmu(void) +{ + struct kvm_cpuid_entry2 *entry; + + entry = kvm_get_supported_cpuid_index(0xa, 0); + return is_intel_cpu() && entry && check_intel_pmu_leaf(entry); +} + +static bool is_zen1(uint32_t eax) +{ + return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f; +} + +static bool is_zen2(uint32_t eax) +{ + return x86_family(eax) == 0x17 && + x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f; +} + +static bool is_zen3(uint32_t eax) +{ + return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f; +} + +/* + * Determining AMD support for a PMU event requires consulting the AMD + * PPR for the CPU or reference material derived therefrom. The AMD + * test code herein has been verified to work on Zen1, Zen2, and Zen3. + * + * Feel free to add more AMD CPUs that are documented to support event + * select 0xc2 umask 0 as "retired branch instructions." + */ +static bool use_amd_pmu(void) +{ + struct kvm_cpuid_entry2 *entry; + + entry = kvm_get_supported_cpuid_index(1, 0); + return is_amd_cpu() && entry && + (is_zen1(entry->eax) || + is_zen2(entry->eax) || + is_zen3(entry->eax)); +} + +int main(int argc, char *argv[]) +{ + void (*guest_code)(void) = NULL; + struct kvm_vm *vm; + int r; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER); + if (!r) { + print_skip("KVM_CAP_PMU_EVENT_FILTER not supported"); + exit(KSFT_SKIP); + } + + if (use_intel_pmu()) + guest_code = intel_guest_code; + else if (use_amd_pmu()) + guest_code = amd_guest_code; + + if (!guest_code) { + print_skip("Don't know how to test this guest PMU"); + exit(KSFT_SKIP); + } + + vm = vm_create_default(VCPU_ID, 0, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + if (!sanity_check_pmu(vm)) { + print_skip("Guest PMU is not functional"); + exit(KSFT_SKIP); + } + + test_without_filter(vm); + test_member_deny_list(vm); + test_member_allow_list(vm); + test_not_member_deny_list(vm); + test_not_member_allow_list(vm); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index 2da8eb8e2d9685197a56c13fce830c34804e213a..a626d40fdb48940be29857b01d80820da447ff7d 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -105,7 +105,6 @@ static void guest_code(void *arg) if (cpu_has_svm()) { run_guest(svm->vmcb, svm->vmcb_gpa); - svm->vmcb->save.rip += 3; run_guest(svm->vmcb, svm->vmcb_gpa); } else { vmlaunch(); diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index 5a6a662f2e590ec4da7959b99c2df230c7d3a071..a426078b16a3fafd10c9c30f9cef798586fcfa83 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -77,8 +77,8 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) switch (get_ucall(vm, vcpuid, &uc)) { case UCALL_SYNC: TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && - uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", - stage + 1, (ulong)uc.args[1]); + uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", + stage + 1, (ulong)uc.args[1]); return; case UCALL_DONE: return; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index 2835a17f1b7ad96f3690177ef27890e66555f70c..edac8839e717f533c4d6d12492a5ed1af671cf85 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -30,8 +30,8 @@ static struct kvm_vm *vm; static void l2_guest_code(void) { /* Exit to L0 */ - asm volatile("inb %%dx, %%al" - : : [port] "d" (PORT_L0_EXIT) : "rax"); + asm volatile("inb %%dx, %%al" + : : [port] "d" (PORT_L0_EXIT) : "rax"); } static void l1_guest_code(struct vmx_pages *vmx_pages) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c new file mode 100644 index 0000000000000000000000000000000000000000..27a850f3d7ce4f47afec79900f03f4b2899a25cb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +#include +#include +#include +#include + +#include "kselftest.h" + +#define VCPU_ID 0 + +static struct kvm_vm *vm; + +static void guest_ud_handler(struct ex_regs *regs) +{ + /* Loop on the ud2 until guest state is made invalid. */ +} + +static void guest_code(void) +{ + asm volatile("ud2"); +} + +static void __run_vcpu_with_invalid_state(void) +{ + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + + vcpu_run(vm, VCPU_ID); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, + "Expected KVM_EXIT_INTERNAL_ERROR, got %d (%s)\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION, + "Expected emulation failure, got %d\n", + run->emulation_failure.suberror); +} + +static void run_vcpu_with_invalid_state(void) +{ + /* + * Always run twice to verify KVM handles the case where _KVM_ queues + * an exception with invalid state and then exits to userspace, i.e. + * that KVM doesn't explode if userspace ignores the initial error. + */ + __run_vcpu_with_invalid_state(); + __run_vcpu_with_invalid_state(); +} + +static void set_timer(void) +{ + struct itimerval timer; + + timer.it_value.tv_sec = 0; + timer.it_value.tv_usec = 200; + timer.it_interval = timer.it_value; + ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); +} + +static void set_or_clear_invalid_guest_state(bool set) +{ + static struct kvm_sregs sregs; + + if (!sregs.cr0) + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.tr.unusable = !!set; + vcpu_sregs_set(vm, VCPU_ID, &sregs); +} + +static void set_invalid_guest_state(void) +{ + set_or_clear_invalid_guest_state(true); +} + +static void clear_invalid_guest_state(void) +{ + set_or_clear_invalid_guest_state(false); +} + +static void sigalrm_handler(int sig) +{ + struct kvm_vcpu_events events; + + TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig); + + vcpu_events_get(vm, VCPU_ID, &events); + + /* + * If an exception is pending, attempt KVM_RUN with invalid guest, + * otherwise rearm the timer and keep doing so until the timer fires + * between KVM queueing an exception and re-entering the guest. + */ + if (events.exception.pending) { + set_invalid_guest_state(); + run_vcpu_with_invalid_state(); + } else { + set_timer(); + } +} + +int main(int argc, char *argv[]) +{ + if (!is_intel_cpu() || vm_is_unrestricted_guest(NULL)) { + print_skip("Must be run with kvm_intel.unrestricted_guest=0"); + exit(KSFT_SKIP); + } + + vm = vm_create_default(VCPU_ID, 0, (void *)guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + + /* + * Stuff invalid guest state for L2 by making TR unusuable. The next + * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support + * emulating invalid guest state for L2. + */ + set_invalid_guest_state(); + run_vcpu_with_invalid_state(); + + /* + * Verify KVM also handles the case where userspace gains control while + * an exception is pending and stuffs invalid state. Run with valid + * guest state and a timer firing every 200us, and attempt to enter the + * guest with invalid state when the handler interrupts KVM with an + * exception pending. + */ + clear_invalid_guest_state(); + TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR, + "Failed to register SIGALRM handler, errno = %d (%s)", + errno, strerror(errno)); + + set_timer(); + run_vcpu_with_invalid_state(); +} diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 478e0ae8b93e633fef70fa3900cd85335f65737a..865e17146815a6585d801a31abf4a63c853d1dff 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -46,20 +46,20 @@ static struct kvm_vm *vm; #define MIN_STEAL_TIME 50000 struct pvclock_vcpu_time_info { - u32 version; - u32 pad0; - u64 tsc_timestamp; - u64 system_time; - u32 tsc_to_system_mul; - s8 tsc_shift; - u8 flags; - u8 pad[2]; + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 flags; + u8 pad[2]; } __attribute__((__packed__)); /* 32 bytes */ struct pvclock_wall_clock { - u32 version; - u32 sec; - u32 nsec; + u32 version; + u32 sec; + u32 nsec; } __attribute__((__packed__)); struct vcpu_runstate_info { @@ -74,11 +74,11 @@ struct arch_vcpu_info { }; struct vcpu_info { - uint8_t evtchn_upcall_pending; - uint8_t evtchn_upcall_mask; - unsigned long evtchn_pending_sel; - struct arch_vcpu_info arch; - struct pvclock_vcpu_time_info time; + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + unsigned long evtchn_pending_sel; + struct arch_vcpu_info arch; + struct pvclock_vcpu_time_info time; }; /* 64 bytes (x86) */ struct shared_info { @@ -493,7 +493,7 @@ int main(int argc, char *argv[]) vm_ts.tv_sec = wc->sec; vm_ts.tv_nsec = wc->nsec; - TEST_ASSERT(wc->version && !(wc->version & 1), + TEST_ASSERT(wc->version && !(wc->version & 1), "Bad wallclock version %x", wc->version); TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old"); TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new"); diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index e54106643337b10f41610a863a56d84e3f8f28a9..4c88238fc8f0535a00de650e73c0ba3efd907b28 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -207,15 +207,21 @@ TEST(check_file_mmap) errno = 0; fd = open(".", O_TMPFILE | O_RDWR, 0600); - ASSERT_NE(-1, fd) { - TH_LOG("Can't create temporary file: %s", - strerror(errno)); + if (fd < 0) { + ASSERT_EQ(errno, EOPNOTSUPP) { + TH_LOG("Can't create temporary file: %s", + strerror(errno)); + } + SKIP(goto out_free, "O_TMPFILE not supported by filesystem."); } errno = 0; retval = fallocate(fd, 0, 0, FILE_SIZE); - ASSERT_EQ(0, retval) { - TH_LOG("Error allocating space for the temporary file: %s", - strerror(errno)); + if (retval) { + ASSERT_EQ(errno, EOPNOTSUPP) { + TH_LOG("Error allocating space for the temporary file: %s", + strerror(errno)); + } + SKIP(goto out_close, "fallocate not supported by filesystem."); } /* @@ -271,7 +277,9 @@ TEST(check_file_mmap) } munmap(addr, FILE_SIZE); +out_close: close(fd); +out_free: free(vec); } diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index 8f6997d3581612ba7379349594c79e00ebaa6e36..d9d1d41901267439aac832166e46410c85f44111 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -240,11 +240,8 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, *p += sizeof(__u32); } - if (ioam6h->type.bit6) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) - return 1; + if (ioam6h->type.bit6) *p += sizeof(__u32); - } if (ioam6h->type.bit7) { if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 27d0eb9afdcaffffb8d3b893f9a88b66d79eab6d..c0801df15f547265e2f63d0625ca7f958e765572 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -75,6 +75,7 @@ init() # let $ns2 reach any $ns1 address from any interface ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i + ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i done } @@ -1158,6 +1159,7 @@ signal_address_tests() ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr "signal addresses race test" 3 3 3 # the server will not signal the address terminating # the MPC subflow @@ -1476,7 +1478,7 @@ ipv6_tests() reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow chk_join_nr "single subflow IPv6" 1 1 1 @@ -1511,7 +1513,7 @@ ipv6_tests() ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 2 - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow chk_join_nr "remove subflow and signal IPv6" 2 2 2 chk_add_nr 1 1 diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index ed61f6cab60f4a933d2e8728889cafdc44d7425c..df322e47a54fb8122093dc7562176f9ad3be1e24 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add" +BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -354,6 +354,23 @@ TYPE_flush_remove_add=" display Add two elements, flush, re-add " +TYPE_reload=" +display net,mac with reload +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 1 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1473,6 +1490,59 @@ test_bug_flush_remove_add() { nft flush ruleset } +# - add ranged element, check that packets match it +# - reload the set, check packets still match +test_bug_reload() { + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + rstart=${start} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + # check kernel does allocate pcpu sctrach map + # for reload with no elemet add/delete + ( echo flush set inet filter test ; + nft list set inet filter test ) | nft -f - + + start=${rstart} + range_size=1 + + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset +} + test_reported_issues() { eval test_bug_"${subtest}" } diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index 349a319a9e51bd7581bb1aea416d5a691f85c916..79fe627b9e817a087178e1efbbce443b884c0c4d 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -899,6 +899,144 @@ EOF ip netns exec "$ns0" nft delete table $family nat } +test_stateless_nat_ip() +{ + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules" + return 1 + fi + +ip netns exec "$ns0" nft -f /dev/stdin < /dev/null # ping ns2->ns1 + if [ $? -ne 0 ] ; then + echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules" + lret=1 + fi + + # ns1 should have seen packets from .2.2, due to stateless rewrite. + expect="packets 1 bytes 84" + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 1" + lret=1 + fi + + for dir in "in" "out" ; do + cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3" + lret=1 + fi + + cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4" + lret=1 + fi + done + + reset_counters + + socat -h > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run stateless nat frag test without socat tool" + if [ $lret -eq 0 ]; then + return $ksft_skip + fi + + ip netns exec "$ns0" nft delete table ip stateless + return $lret + fi + + local tmpfile=$(mktemp) + dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null + + local outfile=$(mktemp) + ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null & + sc_r=$! + + sleep 1 + # re-do with large ping -> ip fragmentation + ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null + if [ $? -ne 0 ] ; then + echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2 + lret=1 + fi + + wait + + cmp "$tmpfile" "$outfile" + if [ $? -ne 0 ]; then + ls -l "$tmpfile" "$outfile" + echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 + lret=1 + fi + + rm -f "$tmpfile" "$outfile" + + # ns1 should have seen packets from 2.2, due to stateless rewrite. + expect="packets 3 bytes 4164" + cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect") + if [ $? -ne 0 ]; then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 5" + lret=1 + fi + + ip netns exec "$ns0" nft delete table ip stateless + if [ $? -ne 0 ]; then + echo "ERROR: Could not delete table ip stateless" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP statless for $ns2" + + return $lret +} + # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 for i in 0 1 2; do ip netns exec ns$i-$sfx nft -f /dev/stdin </dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 + dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345 if [ $? -ne 0 ] ;then ret=1 break @@ -86,7 +86,7 @@ EOF stop=$(date +%s%3N) local duration=$((stop-start)) - echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)" + echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)" done if [ $have_ct_tool -eq 1 ]; then @@ -128,11 +128,11 @@ test_conntrack_tool() { break fi - if [ $((i%10000)) -eq 0 ];then + if [ $((i%1000)) -eq 0 ];then stop=$(date +%s%3N) local duration=$((stop-start)) - echo "PASS: added 10000 entries in $duration ms (now $i total)" + echo "PASS: added 1000 entries in $duration ms (now $i total)" start=$stop fi done diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 4b93b1417b8626043f220a0ecae479bd82191165..843ba56d8e49ecf35777214fff8690b4f6c688e7 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -5,4 +5,4 @@ TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test include ../lib.mk -$(TEST_GEN_PROGS): helpers.c +$(TEST_GEN_PROGS): helpers.c helpers.h diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h index a6ea27344db2db4aac7b2e7a676dedb4283a84ab..7056340b9339e9f4159999c2797d15f5ddf6d3fd 100644 --- a/tools/testing/selftests/openat2/helpers.h +++ b/tools/testing/selftests/openat2/helpers.h @@ -9,6 +9,7 @@ #define _GNU_SOURCE #include +#include #include #include #include "../kselftest.h" @@ -62,11 +63,12 @@ bool needs_openat2(const struct open_how *how); (similar to chroot(2)). */ #endif /* RESOLVE_IN_ROOT */ -#define E_func(func, ...) \ - do { \ - if (func(__VA_ARGS__) < 0) \ - ksft_exit_fail_msg("%s:%d %s failed\n", \ - __FILE__, __LINE__, #func);\ +#define E_func(func, ...) \ + do { \ + errno = 0; \ + if (func(__VA_ARGS__) < 0) \ + ksft_exit_fail_msg("%s:%d %s failed - errno:%d\n", \ + __FILE__, __LINE__, #func, errno); \ } while (0) #define E_asprintf(...) E_func(asprintf, __VA_ARGS__) diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c index 1bddbe934204c7b31a60e13ebd4fc1e9e3300fd9..7fb902099de45e1bb1992ffdc8c916b6e0554feb 100644 --- a/tools/testing/selftests/openat2/openat2_test.c +++ b/tools/testing/selftests/openat2/openat2_test.c @@ -259,6 +259,16 @@ void test_openat2_flags(void) unlink(path); fd = sys_openat2(AT_FDCWD, path, &test->how); + if (fd < 0 && fd == -EOPNOTSUPP) { + /* + * Skip the testcase if it failed because not supported + * by FS. (e.g. a valid O_TMPFILE combination on NFS) + */ + ksft_test_result_skip("openat2 with %s fails with %d (%s)\n", + test->name, fd, strerror(-fd)); + goto next; + } + if (test->err >= 0) failed = (fd < 0); else @@ -303,7 +313,7 @@ skip: else resultfn("openat2 with %s fails with %d (%s)\n", test->name, test->err, strerror(-test->err)); - +next: free(fdpath); fflush(stdout); } diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c index 8e83cf91513a684072bef3caa0161297cae07fa0..6d849dc2bee0b53b0b4c69004a30a4fa6411971c 100644 --- a/tools/testing/selftests/perf_events/sigtrap_threads.c +++ b/tools/testing/selftests/perf_events/sigtrap_threads.c @@ -44,9 +44,10 @@ static struct { } ctx; /* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */ -#define TEST_SIG_DATA(addr) (~(unsigned long)(addr)) +#define TEST_SIG_DATA(addr, id) (~(unsigned long)(addr) + id) -static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) +static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr, + unsigned long id) { struct perf_event_attr attr = { .type = PERF_TYPE_BREAKPOINT, @@ -60,7 +61,7 @@ static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr) .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */ .remove_on_exec = 1, /* Required by sigtrap. */ .sigtrap = 1, /* Request synchronous SIGTRAP on event. */ - .sig_data = TEST_SIG_DATA(addr), + .sig_data = TEST_SIG_DATA(addr, id), }; return attr; } @@ -110,7 +111,7 @@ FIXTURE(sigtrap_threads) FIXTURE_SETUP(sigtrap_threads) { - struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on); + struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on, 0); struct sigaction action = {}; int i; @@ -165,7 +166,7 @@ TEST_F(sigtrap_threads, enable_event) EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -175,7 +176,7 @@ TEST_F(sigtrap_threads, enable_event) /* Test that modification propagates to all inherited events. */ TEST_F(sigtrap_threads, modify_and_enable_event) { - struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on); + struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on, 42); EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr), 0); run_test_threads(_metadata, self); @@ -184,7 +185,7 @@ TEST_F(sigtrap_threads, modify_and_enable_event) EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 42)); /* Check enabled for parent. */ ctx.iterate_on = 0; @@ -204,7 +205,7 @@ TEST_F(sigtrap_threads, signal_stress) EXPECT_EQ(ctx.tids_want_signal, 0); EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on); EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT); - EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on)); + EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0)); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 01f8d3c0cf2cb844ecc233e5cc1f6b7fa1a42c0f..6922d6417e1cf06094c10627e0bd491d292d6fd2 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -68,7 +68,7 @@ #define PIDFD_SKIP 3 #define PIDFD_XFAIL 4 -int wait_for_pid(pid_t pid) +static inline int wait_for_pid(pid_t pid) { int status, ret; @@ -78,13 +78,20 @@ again: if (errno == EINTR) goto again; + ksft_print_msg("waitpid returned -1, errno=%d\n", errno); return -1; } - if (!WIFEXITED(status)) + if (!WIFEXITED(status)) { + ksft_print_msg( + "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n", + WIFSIGNALED(status), WTERMSIG(status)); return -1; + } - return WEXITSTATUS(status); + ret = WEXITSTATUS(status); + ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret); + return ret; } static inline int sys_pidfd_open(pid_t pid, unsigned int flags) diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index 22558524f71c34da86e9e0dacc5a73c523254e72..3fd8e903118f532d43a2ac251ec71531f19ca161 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "pidfd.h" #include "../kselftest.h" @@ -80,7 +81,10 @@ static inline int error_check(struct error *err, const char *test_name) return err->code; } +#define CHILD_STACK_SIZE 8192 + struct child { + char *stack; pid_t pid; int fd; }; @@ -89,17 +93,22 @@ static struct child clone_newns(int (*fn)(void *), void *args, struct error *err) { static int flags = CLONE_PIDFD | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD; - size_t stack_size = 1024; - char *stack[1024] = { 0 }; struct child ret; if (!(flags & CLONE_NEWUSER) && geteuid() != 0) flags |= CLONE_NEWUSER; + ret.stack = mmap(NULL, CHILD_STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (ret.stack == MAP_FAILED) { + error_set(err, -1, "mmap of stack failed (errno %d)", errno); + return ret; + } + #ifdef __ia64__ - ret.pid = __clone2(fn, stack, stack_size, flags, args, &ret.fd); + ret.pid = __clone2(fn, ret.stack, CHILD_STACK_SIZE, flags, args, &ret.fd); #else - ret.pid = clone(fn, stack + stack_size, flags, args, &ret.fd); + ret.pid = clone(fn, ret.stack + CHILD_STACK_SIZE, flags, args, &ret.fd); #endif if (ret.pid < 0) { @@ -129,6 +138,11 @@ static inline int child_join(struct child *child, struct error *err) else if (r > 0) error_set(err, r, "child %d reported: %d", child->pid, r); + if (munmap(child->stack, CHILD_STACK_SIZE)) { + error_set(err, -1, "munmap of child stack failed (errno %d)", errno); + r = -1; + } + return r; } diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index 529eb700ac26a39833a4e2c4c25b4caeb57af272..9a2d64901d591f877a7f9db3989df3cb01dce893 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -441,7 +441,6 @@ static void test_pidfd_poll_exec(int use_waitpid) { int pid, pidfd = 0; int status, ret; - pthread_t t1; time_t prog_start = time(NULL); const char *test_name = "pidfd_poll check for premature notification on child thread exec"; @@ -500,13 +499,14 @@ static int child_poll_leader_exit_test(void *args) */ *child_exit_secs = time(NULL); syscall(SYS_exit, 0); + /* Never reached, but appeases compiler thinking we should return. */ + exit(0); } static void test_pidfd_poll_leader_exit(int use_waitpid) { int pid, pidfd = 0; - int status, ret; - time_t prog_start = time(NULL); + int status, ret = 0; const char *test_name = "pidfd_poll check for premature notification on non-empty" "group leader exit"; diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index be2943f072f6088ba77df89ba2b7c942b9534465..17999e082aa715525f013f194b278e1bdf6786ad 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -39,7 +39,7 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, TEST(wait_simple) { - int pidfd = -1, status = 0; + int pidfd = -1; pid_t parent_tid = -1; struct clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), @@ -47,7 +47,6 @@ TEST(wait_simple) .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, }; - int ret; pid_t pid; siginfo_t info = { .si_signo = 0, @@ -88,7 +87,7 @@ TEST(wait_simple) TEST(wait_states) { - int pidfd = -1, status = 0; + int pidfd = -1; pid_t parent_tid = -1; struct clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings index ba4d85f74cd6b9a671652d2610ac08bbd386a577..a953c96aa16e1e814867b33d06e894ffb664bb1b 100644 --- a/tools/testing/selftests/rtc/settings +++ b/tools/testing/selftests/rtc/settings @@ -1 +1 @@ -timeout=90 +timeout=180 diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index 3d603f1394af4f80d23cfb8de723771112818d4a..883ca85424bc5363409fb438e826316c0c858b72 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -33,110 +33,114 @@ typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts); typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts); typedef time_t (*vdso_time_t)(time_t *t); -static int vdso_test_gettimeofday(void) +#define VDSO_TEST_PASS_MSG() "\n%s(): PASS\n", __func__ +#define VDSO_TEST_FAIL_MSG(x) "\n%s(): %s FAIL\n", __func__, x +#define VDSO_TEST_SKIP_MSG(x) "\n%s(): SKIP: Could not find %s\n", __func__, x + +static void vdso_test_gettimeofday(void) { /* Find gettimeofday. */ vdso_gettimeofday_t vdso_gettimeofday = (vdso_gettimeofday_t)vdso_sym(version, name[0]); if (!vdso_gettimeofday) { - printf("Could not find %s\n", name[0]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[0])); + return; } struct timeval tv; long ret = vdso_gettimeofday(&tv, 0); if (ret == 0) { - printf("The time is %lld.%06lld\n", - (long long)tv.tv_sec, (long long)tv.tv_usec); + ksft_print_msg("The time is %lld.%06lld\n", + (long long)tv.tv_sec, (long long)tv.tv_usec); + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); } else { - printf("%s failed\n", name[0]); - return KSFT_FAIL; + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[0])); } - - return KSFT_PASS; } -static int vdso_test_clock_gettime(clockid_t clk_id) +static void vdso_test_clock_gettime(clockid_t clk_id) { /* Find clock_gettime. */ vdso_clock_gettime_t vdso_clock_gettime = (vdso_clock_gettime_t)vdso_sym(version, name[1]); if (!vdso_clock_gettime) { - printf("Could not find %s\n", name[1]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[1])); + return; } struct timespec ts; long ret = vdso_clock_gettime(clk_id, &ts); if (ret == 0) { - printf("The time is %lld.%06lld\n", - (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_print_msg("The time is %lld.%06lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); } else { - printf("%s failed\n", name[1]); - return KSFT_FAIL; + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[1])); } - - return KSFT_PASS; } -static int vdso_test_time(void) +static void vdso_test_time(void) { /* Find time. */ vdso_time_t vdso_time = (vdso_time_t)vdso_sym(version, name[2]); if (!vdso_time) { - printf("Could not find %s\n", name[2]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[2])); + return; } long ret = vdso_time(NULL); if (ret > 0) { - printf("The time in hours since January 1, 1970 is %lld\n", + ksft_print_msg("The time in hours since January 1, 1970 is %lld\n", (long long)(ret / 3600)); + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); } else { - printf("%s failed\n", name[2]); - return KSFT_FAIL; + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[2])); } - - return KSFT_PASS; } -static int vdso_test_clock_getres(clockid_t clk_id) +static void vdso_test_clock_getres(clockid_t clk_id) { + int clock_getres_fail = 0; + /* Find clock_getres. */ vdso_clock_getres_t vdso_clock_getres = (vdso_clock_getres_t)vdso_sym(version, name[3]); if (!vdso_clock_getres) { - printf("Could not find %s\n", name[3]); - return KSFT_SKIP; + ksft_test_result_skip(VDSO_TEST_SKIP_MSG(name[3])); + return; } struct timespec ts, sys_ts; long ret = vdso_clock_getres(clk_id, &ts); if (ret == 0) { - printf("The resolution is %lld %lld\n", - (long long)ts.tv_sec, (long long)ts.tv_nsec); + ksft_print_msg("The vdso resolution is %lld %lld\n", + (long long)ts.tv_sec, (long long)ts.tv_nsec); } else { - printf("%s failed\n", name[3]); - return KSFT_FAIL; + clock_getres_fail++; } ret = syscall(SYS_clock_getres, clk_id, &sys_ts); - if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) { - printf("%s failed\n", name[3]); - return KSFT_FAIL; - } + ksft_print_msg("The syscall resolution is %lld %lld\n", + (long long)sys_ts.tv_sec, (long long)sys_ts.tv_nsec); - return KSFT_PASS; + if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec)) + clock_getres_fail++; + + if (clock_getres_fail > 0) { + ksft_test_result_fail(VDSO_TEST_FAIL_MSG(name[3])); + } else { + ksft_test_result_pass(VDSO_TEST_PASS_MSG()); + } } const char *vdso_clock_name[12] = { @@ -158,36 +162,23 @@ const char *vdso_clock_name[12] = { * This function calls vdso_test_clock_gettime and vdso_test_clock_getres * with different values for clock_id. */ -static inline int vdso_test_clock(clockid_t clock_id) +static inline void vdso_test_clock(clockid_t clock_id) { - int ret0, ret1; - - ret0 = vdso_test_clock_gettime(clock_id); - /* A skipped test is considered passed */ - if (ret0 == KSFT_SKIP) - ret0 = KSFT_PASS; - - ret1 = vdso_test_clock_getres(clock_id); - /* A skipped test is considered passed */ - if (ret1 == KSFT_SKIP) - ret1 = KSFT_PASS; + ksft_print_msg("\nclock_id: %s\n", vdso_clock_name[clock_id]); - ret0 += ret1; + vdso_test_clock_gettime(clock_id); - printf("clock_id: %s", vdso_clock_name[clock_id]); - - if (ret0 > 0) - printf(" [FAIL]\n"); - else - printf(" [PASS]\n"); - - return ret0; + vdso_test_clock_getres(clock_id); } +#define VDSO_TEST_PLAN 16 + int main(int argc, char **argv) { unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); - int ret; + + ksft_print_header(); + ksft_set_plan(VDSO_TEST_PLAN); if (!sysinfo_ehdr) { printf("AT_SYSINFO_EHDR is not present!\n"); @@ -201,44 +192,42 @@ int main(int argc, char **argv) vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); - ret = vdso_test_gettimeofday(); + vdso_test_gettimeofday(); #if _POSIX_TIMERS > 0 #ifdef CLOCK_REALTIME - ret += vdso_test_clock(CLOCK_REALTIME); + vdso_test_clock(CLOCK_REALTIME); #endif #ifdef CLOCK_BOOTTIME - ret += vdso_test_clock(CLOCK_BOOTTIME); + vdso_test_clock(CLOCK_BOOTTIME); #endif #ifdef CLOCK_TAI - ret += vdso_test_clock(CLOCK_TAI); + vdso_test_clock(CLOCK_TAI); #endif #ifdef CLOCK_REALTIME_COARSE - ret += vdso_test_clock(CLOCK_REALTIME_COARSE); + vdso_test_clock(CLOCK_REALTIME_COARSE); #endif #ifdef CLOCK_MONOTONIC - ret += vdso_test_clock(CLOCK_MONOTONIC); + vdso_test_clock(CLOCK_MONOTONIC); #endif #ifdef CLOCK_MONOTONIC_RAW - ret += vdso_test_clock(CLOCK_MONOTONIC_RAW); + vdso_test_clock(CLOCK_MONOTONIC_RAW); #endif #ifdef CLOCK_MONOTONIC_COARSE - ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE); + vdso_test_clock(CLOCK_MONOTONIC_COARSE); #endif #endif - ret += vdso_test_time(); - - if (ret > 0) - return KSFT_FAIL; + vdso_test_time(); - return KSFT_PASS; + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; } diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index d3fd24f9fae8c89579454d6eefacf59ccbb94cb3..2f49c9af1b582449b5e06d250ae2a9b69c0a461e 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -1417,6 +1417,7 @@ static void userfaultfd_pagemap_test(unsigned int test_pgsize) static int userfaultfd_stress(void) { void *area; + char *tmp_area; unsigned long nr; struct uffdio_register uffdio_register; struct uffd_stats uffd_stats[nr_cpus]; @@ -1527,9 +1528,13 @@ static int userfaultfd_stress(void) count_verify[nr], nr); /* prepare next bounce */ - swap(area_src, area_dst); + tmp_area = area_src; + area_src = area_dst; + area_dst = tmp_area; - swap(area_src_alias, area_dst_alias); + tmp_area = area_src_alias; + area_src_alias = area_dst_alias; + area_dst_alias = tmp_area; uffd_stats_report(uffd_stats, nr_cpus); } diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 232e958ec454756501f2caa8eaf2133067fe10ac..b0b91d9b0dc2143cf689c9d42be8410057790215 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -2,9 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 TCID="zram.sh" -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - . ./zram_lib.sh run_zram () { @@ -18,14 +15,4 @@ echo "" check_prereqs -# check zram module exists -MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko -if [ -f $MODULE_PATH ]; then - run_zram -elif [ -b /dev/zram0 ]; then - run_zram -else - echo "$TCID : No zram.ko module or /dev/zram0 device file not found" - echo "$TCID : CONFIG_ZRAM is not set" - exit $ksft_skip -fi +run_zram diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index 114863d9fb8768c906732d7ce780e3f6ea73e44c..8f4affe34f3e4c1d379d96515506e048954be073 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,9 +33,7 @@ zram_algs="lzo" zram_fill_fs() { - local mem_free0=$(free -m | awk 'NR==2 {print $4}') - - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "fill zram$i..." local b=0 while [ true ]; do @@ -45,29 +43,17 @@ zram_fill_fs() b=$(($b + 1)) done echo "zram$i can be filled with '$b' KB" - done - local mem_free1=$(free -m | awk 'NR==2 {print $4}') - local used_mem=$(($mem_free0 - $mem_free1)) + local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"` + local v=$((100 * 1024 * $b / $mem_used_total)) + if [ "$v" -lt 100 ]; then + echo "FAIL compression ratio: 0.$v:1" + ERR_CODE=-1 + return + fi - local total_size=0 - for sm in $zram_sizes; do - local s=$(echo $sm | sed 's/M//') - total_size=$(($total_size + $s)) + echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" done - - echo "zram used ${used_mem}M, zram disk sizes ${total_size}M" - - local v=$((100 * $total_size / $used_mem)) - - if [ "$v" -lt 100 ]; then - echo "FAIL compression ratio: 0.$v:1" - ERR_CODE=-1 - zram_cleanup - return - fi - - echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" } check_prereqs @@ -81,7 +67,6 @@ zram_mount zram_fill_fs zram_cleanup -zram_unload if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh index e83b404807c09a7cf2a3cdefb9ec4eb746319a36..2418b0c4ed136b3b603ade62cd2838ff61af38b8 100755 --- a/tools/testing/selftests/zram/zram02.sh +++ b/tools/testing/selftests/zram/zram02.sh @@ -36,7 +36,6 @@ zram_set_memlimit zram_makeswap zram_swapoff zram_cleanup -zram_unload if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index 6f872f266fd1151b6771f55147d265468a3ad74d..21ec1966de76ca4047fe50eea82015bc03986c98 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -5,12 +5,17 @@ # Author: Alexey Kodanev # Modified: Naresh Kamboju -MODULE=0 dev_makeswap=-1 dev_mounted=-1 - +dev_start=0 +dev_end=-1 +module_load=-1 +sys_control=-1 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +kernel_version=`uname -r | cut -d'.' -f1,2` +kernel_major=${kernel_version%.*} +kernel_minor=${kernel_version#*.} trap INT @@ -25,68 +30,104 @@ check_prereqs() fi } +kernel_gte() +{ + major=${1%.*} + minor=${1#*.} + + if [ $kernel_major -gt $major ]; then + return 0 + elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then + return 0 + fi + + return 1 +} + zram_cleanup() { echo "zram cleanup" local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_makeswap); do swapoff /dev/zram$i done - for i in $(seq 0 $dev_mounted); do + for i in $(seq $dev_start $dev_mounted); do umount /dev/zram$i done - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo 1 > /sys/block/zram${i}/reset rm -rf zram$i done -} + if [ $sys_control -eq 1 ]; then + for i in $(seq $dev_start $dev_end); do + echo $i > /sys/class/zram-control/hot_remove + done + fi -zram_unload() -{ - if [ $MODULE -ne 0 ] ; then - echo "zram rmmod zram" + if [ $module_load -eq 1 ]; then rmmod zram > /dev/null 2>&1 fi } zram_load() { - # check zram module exists - MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko - if [ -f $MODULE_PATH ]; then - MODULE=1 - echo "create '$dev_num' zram device(s)" - modprobe zram num_devices=$dev_num - if [ $? -ne 0 ]; then - echo "failed to insert zram module" - exit 1 - fi - - dev_num_created=$(ls /dev/zram* | wc -w) + echo "create '$dev_num' zram device(s)" + + # zram module loaded, new kernel + if [ -d "/sys/class/zram-control" ]; then + echo "zram modules already loaded, kernel supports" \ + "zram-control interface" + dev_start=$(ls /dev/zram* | wc -w) + dev_end=$(($dev_start + $dev_num - 1)) + sys_control=1 + + for i in $(seq $dev_start $dev_end); do + cat /sys/class/zram-control/hot_add > /dev/null + done + + echo "all zram devices (/dev/zram$dev_start~$dev_end" \ + "successfully created" + return 0 + fi - if [ "$dev_num_created" -ne "$dev_num" ]; then - echo "unexpected num of devices: $dev_num_created" - ERR_CODE=-1 + # detect old kernel or built-in + modprobe zram num_devices=$dev_num + if [ ! -d "/sys/class/zram-control" ]; then + if grep -q '^zram' /proc/modules; then + rmmod zram > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "zram module is being used on old kernel" \ + "without zram-control interface" + exit $ksft_skip + fi else - echo "zram load module successful" + echo "test needs CONFIG_ZRAM=m on old kernel without" \ + "zram-control interface" + exit $ksft_skip fi - elif [ -b /dev/zram0 ]; then - echo "/dev/zram0 device file found: OK" - else - echo "ERROR: No zram.ko module or no /dev/zram0 device found" - echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + modprobe zram num_devices=$dev_num fi + + module_load=1 + dev_end=$(($dev_num - 1)) + echo "all zram devices (/dev/zram0~$dev_end) successfully created" } zram_max_streams() { echo "set max_comp_streams to zram device(s)" - local i=0 + kernel_gte 4.7 + if [ $? -eq 0 ]; then + echo "The device attribute max_comp_streams was"\ + "deprecated in 4.7" + return 0 + fi + + local i=$dev_start for max_s in $zram_max_streams; do local sys_path="/sys/block/zram${i}/max_comp_streams" echo $max_s > $sys_path || \ @@ -98,7 +139,7 @@ zram_max_streams() echo "FAIL can't set max_streams '$max_s', get $max_stream" i=$(($i + 1)) - echo "$sys_path = '$max_streams' ($i/$dev_num)" + echo "$sys_path = '$max_streams'" done echo "zram max streams: OK" @@ -108,15 +149,16 @@ zram_compress_alg() { echo "test that we can set compression algorithm" - local algs=$(cat /sys/block/zram0/comp_algorithm) + local i=$dev_start + local algs=$(cat /sys/block/zram${i}/comp_algorithm) echo "supported algs: $algs" - local i=0 + for alg in $zram_algs; do local sys_path="/sys/block/zram${i}/comp_algorithm" echo "$alg" > $sys_path || \ echo "FAIL can't set '$alg' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$alg' ($i/$dev_num)" + echo "$sys_path = '$alg'" done echo "zram set compression algorithm: OK" @@ -125,14 +167,14 @@ zram_compress_alg() zram_set_disksizes() { echo "set disk size to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_sizes; do local sys_path="/sys/block/zram${i}/disksize" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done echo "zram set disksizes: OK" @@ -142,14 +184,14 @@ zram_set_memlimit() { echo "set memory limit to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_mem_limits; do local sys_path="/sys/block/zram${i}/mem_limit" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done echo "zram set memory limit: OK" @@ -158,8 +200,8 @@ zram_set_memlimit() zram_makeswap() { echo "make swap with zram device(s)" - local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + local i=$dev_start + for i in $(seq $dev_start $dev_end); do mkswap /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -182,7 +224,7 @@ zram_makeswap() zram_swapoff() { local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_end); do swapoff /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -196,7 +238,7 @@ zram_swapoff() zram_makefs() { - local i=0 + local i=$dev_start for fs in $zram_filesystems; do # if requested fs not supported default it to ext2 which mkfs.$fs > /dev/null 2>&1 || fs=ext2 @@ -215,7 +257,7 @@ zram_makefs() zram_mount() { local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "mount /dev/zram$i" mkdir zram$i mount /dev/zram$i zram$i > /dev/null || \ diff --git a/tools/tracing/Makefile b/tools/tracing/Makefile index 87e0ec48e2e7d0da01d1c2a3f09db04bcf668e92..95e485f12d97f3c4466a90a4f088f05608a78f40 100644 --- a/tools/tracing/Makefile +++ b/tools/tracing/Makefile @@ -1,11 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 include ../scripts/Makefile.include -all: latency +all: latency rtla -clean: latency_clean +clean: latency_clean rtla_clean -install: latency_install +install: latency_install rtla_install latency: $(call descend,latency) @@ -16,4 +16,14 @@ latency_install: latency_clean: $(call descend,latency,clean) -.PHONY: all install clean latency latency_install latency_clean +rtla: + $(call descend,rtla) + +rtla_install: + $(call descend,rtla,install) + +rtla_clean: + $(call descend,rtla,clean) + +.PHONY: all install clean latency latency_install latency_clean \ + rtla rtla_install rtla_clean diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 2d52ff0bff7dadbd1154ed58395e9dd9922cca31..5a1eda61799246aadbe604c5a742529e97dfc999 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -1,5 +1,6 @@ NAME := rtla -VERSION := 0.5 +# Follow the kernel version +VERSION := $(shell cat VERSION 2> /dev/null || make -sC ../../.. kernelversion) # From libtracefs: # Makefiles suck: This macro sets a default value of $(2) for the @@ -59,7 +60,7 @@ endif .PHONY: all all: rtla -rtla: $(OBJ) doc +rtla: $(OBJ) $(CC) -o rtla $(LDFLAGS) $(OBJ) $(LIBS) static: $(OBJ) @@ -85,6 +86,7 @@ clean: doc_clean tarball: clean rm -rf $(NAME)-$(VERSION) && mkdir $(NAME)-$(VERSION) + echo $(VERSION) > $(NAME)-$(VERSION)/VERSION cp -r $(DIRS) $(FILES) $(NAME)-$(VERSION) mkdir $(NAME)-$(VERSION)/Documentation/ cp -rp $(SRCTREE)/../../../Documentation/tools/rtla/* $(NAME)-$(VERSION)/Documentation/ diff --git a/tools/tracing/rtla/src/osnoise.c b/tools/tracing/rtla/src/osnoise.c index 7b73d1eccd0e3d373e56b42ba873e87a7a95ce22..5648f9252e580870a4b6fcc72ff09debb34405f8 100644 --- a/tools/tracing/rtla/src/osnoise.c +++ b/tools/tracing/rtla/src/osnoise.c @@ -750,6 +750,9 @@ void osnoise_put_context(struct osnoise_context *context) */ void osnoise_destroy_tool(struct osnoise_tool *top) { + if (!top) + return; + trace_instance_destroy(&top->trace); if (top->context) diff --git a/tools/tracing/rtla/src/osnoise_hist.c b/tools/tracing/rtla/src/osnoise_hist.c index 180fcbe423cd1dd3b9d94d2a5e387fbd7a7ad365..1f0b7fce55cf68d3a2e1652ebf2c9bb9ba6b9070 100644 --- a/tools/tracing/rtla/src/osnoise_hist.c +++ b/tools/tracing/rtla/src/osnoise_hist.c @@ -701,9 +701,9 @@ osnoise_hist_set_signals(struct osnoise_hist_params *params) int osnoise_hist_main(int argc, char *argv[]) { struct osnoise_hist_params *params; + struct osnoise_tool *record = NULL; + struct osnoise_tool *tool = NULL; struct trace_instance *trace; - struct osnoise_tool *record; - struct osnoise_tool *tool; int return_value = 1; int retval; @@ -792,9 +792,8 @@ int osnoise_hist_main(int argc, char *argv[]) out_hist: osnoise_free_histogram(tool->data); out_destroy: + osnoise_destroy_tool(record); osnoise_destroy_tool(tool); - if (params->trace_output) - osnoise_destroy_tool(record); free(params); out_exit: exit(return_value); diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 332b2ac205fc4383a820a4e40b58916a3127e34c..c67dc28ef716af5e68769e51829a3908cfae7030 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -483,9 +483,9 @@ static void osnoise_top_set_signals(struct osnoise_top_params *params) int osnoise_top_main(int argc, char **argv) { struct osnoise_top_params *params; + struct osnoise_tool *record = NULL; + struct osnoise_tool *tool = NULL; struct trace_instance *trace; - struct osnoise_tool *record; - struct osnoise_tool *tool; int return_value = 1; int retval; @@ -571,9 +571,8 @@ int osnoise_top_main(int argc, char **argv) out_top: osnoise_free_top(tool->data); + osnoise_destroy_tool(record); osnoise_destroy_tool(tool); - if (params->trace_output) - osnoise_destroy_tool(record); out_exit: exit(return_value); } diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index 235f9620ef3d6ccb4663f0ef2285d31a00a24cfa..436a799f9adf9fad3a436578e4fe3eb09d29aa9e 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -729,9 +729,9 @@ timerlat_hist_set_signals(struct timerlat_hist_params *params) int timerlat_hist_main(int argc, char *argv[]) { struct timerlat_hist_params *params; + struct osnoise_tool *record = NULL; + struct osnoise_tool *tool = NULL; struct trace_instance *trace; - struct osnoise_tool *record; - struct osnoise_tool *tool; int return_value = 1; int retval; @@ -813,9 +813,8 @@ int timerlat_hist_main(int argc, char *argv[]) out_hist: timerlat_free_histogram(tool->data); + osnoise_destroy_tool(record); osnoise_destroy_tool(tool); - if (params->trace_output) - osnoise_destroy_tool(record); free(params); out_exit: exit(return_value); diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 1ebd5291539cb5ea8221cf71b8b0a84d6b2551d7..d4187f6534ed880c64939079d3c50617369c1997 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -521,9 +521,9 @@ timerlat_top_set_signals(struct timerlat_top_params *params) int timerlat_top_main(int argc, char *argv[]) { struct timerlat_top_params *params; + struct osnoise_tool *record = NULL; + struct osnoise_tool *top = NULL; struct trace_instance *trace; - struct osnoise_tool *record; - struct osnoise_tool *top; int return_value = 1; int retval; @@ -609,9 +609,8 @@ int timerlat_top_main(int argc, char *argv[]) out_top: timerlat_free_top(top->data); + osnoise_destroy_tool(record); osnoise_destroy_tool(top); - if (params->trace_output) - osnoise_destroy_tool(record); free(params); out_exit: exit(return_value); diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index 107a0c6387f7f19752b08d26f501ffcdedce8475..83de259abcc1eff0d6a3169ba55e056d411106bb 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -20,14 +20,14 @@ int enable_tracer_by_name(struct tracefs_instance *inst, const char *tracer_name tracer = TRACEFS_TRACER_CUSTOM; - debug_msg("enabling %s tracer\n", tracer_name); + debug_msg("Enabling %s tracer\n", tracer_name); retval = tracefs_tracer_set(inst, tracer, tracer_name); if (retval < 0) { if (errno == ENODEV) - err_msg("tracer %s not found!\n", tracer_name); + err_msg("Tracer %s not found!\n", tracer_name); - err_msg("failed to enable the tracer %s\n", tracer_name); + err_msg("Failed to enable the %s tracer\n", tracer_name); return -1; } @@ -44,7 +44,7 @@ void disable_tracer(struct tracefs_instance *inst) retval = tracefs_tracer_set(inst, t); if (retval < 0) - err_msg("oops, error disabling tracer\n"); + err_msg("Oops, error disabling tracer\n"); } /* diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 1c9f0eea616699efd026785cc56867a7ea66d183..ffaf8ec84001e1bb8cbbba87bc835133ffde66a8 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -77,11 +77,11 @@ void get_duration(time_t start_time, char *output, int output_size) time_t duration; duration = difftime(now, start_time); - tm_info = localtime(&duration); + tm_info = gmtime(&duration); snprintf(output, output_size, "%3d %02d:%02d:%02d", tm_info->tm_yday, - tm_info->tm_hour - 1, + tm_info->tm_hour, tm_info->tm_min, tm_info->tm_sec); } diff --git a/usr/include/Makefile b/usr/include/Makefile index 94403806ea568beb0194af7cdb0894c05968aab5..83822c33e9e70b806c807f046e7600ef17fb8c3e 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -28,13 +28,13 @@ no-header-test += linux/am437x-vpfe.h no-header-test += linux/android/binder.h no-header-test += linux/android/binderfs.h no-header-test += linux/coda.h +no-header-test += linux/cyclades.h no-header-test += linux/errqueue.h no-header-test += linux/fsmap.h no-header-test += linux/hdlc/ioctl.h no-header-test += linux/ivtv.h no-header-test += linux/kexec.h no-header-test += linux/matroxfb.h -no-header-test += linux/nfc.h no-header-test += linux/omap3isp.h no-header-test += linux/omapfb.h no-header-test += linux/patchkey.h diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 2ad013b8bde96adcd7d2091f3c9fe65864764d03..59b1dd4a549ee041767628b4a3c1f2d7992e7abb 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -463,8 +463,8 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) + hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list, + link, srcu_read_lock_held(&kvm->irq_srcu)) if (kian->gsi == gsi) { srcu_read_unlock(&kvm->irq_srcu, idx); return true; @@ -480,8 +480,8 @@ void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) { struct kvm_irq_ack_notifier *kian; - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) + hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list, + link, srcu_read_lock_held(&kvm->irq_srcu)) if (kian->gsi == gsi) kian->irq_acked(kian); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 504158f0e1314a3566427aa4ad3c6dbfbd3426b1..58d31da8a2f7c2eac20f64da3952f43614592341 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -427,9 +427,6 @@ static void kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) #endif kvm_async_pf_vcpu_init(vcpu); - vcpu->pre_pcpu = -1; - INIT_LIST_HEAD(&vcpu->blocked_vcpu_list); - kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false); vcpu->preempted = false; @@ -2251,7 +2248,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn return NULL; } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { @@ -2466,9 +2462,8 @@ static int kvm_try_get_pfn(kvm_pfn_t pfn) } static int hva_to_pfn_remapped(struct vm_area_struct *vma, - unsigned long addr, bool *async, - bool write_fault, bool *writable, - kvm_pfn_t *p_pfn) + unsigned long addr, bool write_fault, + bool *writable, kvm_pfn_t *p_pfn) { kvm_pfn_t pfn; pte_t *ptep; @@ -2578,7 +2573,7 @@ retry: if (vma == NULL) pfn = KVM_PFN_ERR_FAULT; else if (vma->vm_flags & (VM_IO | VM_PFNMAP)) { - r = hva_to_pfn_remapped(vma, addr, async, write_fault, writable, &pfn); + r = hva_to_pfn_remapped(vma, addr, write_fault, writable, &pfn); if (r == -EAGAIN) goto retry; if (r < 0) @@ -3163,8 +3158,10 @@ void mark_page_dirty_in_slot(struct kvm *kvm, { struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); +#ifdef CONFIG_HAVE_KVM_DIRTY_RING if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm)) return; +#endif if (memslot && kvm_slot_dirty_track_enabled(memslot)) { unsigned long rel_gfn = gfn - memslot->base_gfn;